-
Notifications
You must be signed in to change notification settings - Fork 3
/
data_frame_impl.dart
124 lines (107 loc) · 3.85 KB
/
data_frame_impl.dart
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import 'package:ml_dataframe/src/data_frame/data_frame.dart';
import 'package:ml_dataframe/src/data_frame/data_frame_helpers.dart';
import 'package:ml_dataframe/src/data_frame/series.dart';
import 'package:ml_dataframe/src/numerical_converter/numerical_converter.dart';
import 'package:ml_linalg/dtype.dart';
import 'package:ml_linalg/linalg.dart';
import 'package:ml_linalg/matrix.dart';
import 'package:quiver/iterables.dart';
class DataFrameImpl implements DataFrame {
DataFrameImpl(this.rows, this.header, this._toNumber) :
series = convertRowsToSeries(header, rows);
DataFrameImpl.fromSeries(this.series, this._toNumber) :
header = series.map((series) => series.name),
rows = convertSeriesToRows(series);
DataFrameImpl.fromMatrix(
Matrix matrix,
this.header,
this._toNumber,
Iterable<bool> areSeriesDiscrete,
) :
rows = matrix.rows,
series = zip([header, matrix.columns,
areSeriesDiscrete ?? List.filled(matrix.columnsNum, false)])
.map((seriesData) => Series(
seriesData[0] as String,
seriesData[1] as Iterable,
isDiscrete: seriesData[2] as bool,
)) {
_cachedMatrices[matrix.dtype] = matrix;
}
@override
final Iterable<String> header;
@override
final Iterable<Iterable<dynamic>> rows;
@override
final Iterable<Series> series;
final NumericalConverter _toNumber;
final Map<DType, Matrix> _cachedMatrices = {};
@override
Series operator [](Object key) {
final seriesName = key is int
? header.elementAt(key)
: key;
return _getCachedOrCreateSeriesByName()[seriesName];
}
@override
DataFrame sampleFromSeries({
Iterable<int> indices = const [],
Iterable<String> names = const [],
}) {
if (indices.isNotEmpty) {
final maxIdx = series.length - 1;
final outRangedIndices = indices.where((idx) => idx < 0 || idx > maxIdx);
if (outRangedIndices.isNotEmpty) {
throw RangeError('Some of provided indices are out of range: '
'$outRangedIndices, while the valid range is 0..$maxIdx (both '
'inclusive)');
}
return _sampleFromSeries(indices);
}
final absentNames = Set<String>
.from(names)
.difference(Set.from(header));
if (absentNames.isNotEmpty) {
throw Exception('Columns with names $absentNames do not exist');
};
return _sampleFromSeries(names);
}
@override
DataFrame dropSeries({
Iterable<int> seriesIndices = const [],
Iterable<String> seriesNames = const [],
}) {
if (seriesIndices.isNotEmpty) {
return _dropByIndices(seriesIndices, series);
}
return _dropByNames(seriesNames, series);
}
@override
Matrix toMatrix([DType dtype = DType.float32]) =>
_cachedMatrices[dtype] ??= Matrix.fromList(
_toNumber
.convertRawData(rows)
.map((row) => row.toList())
.toList(),
dtype: dtype,
);
DataFrame _sampleFromSeries(Iterable ids) =>
DataFrame.fromSeries(ids.map((dynamic id) => this[id]));
DataFrame _dropByIndices(Iterable<int> indices, Iterable<Series> series) {
final uniqueIndices = Set<int>.from(indices);
final newSeries = enumerate(series)
.where((indexedSeries) => !uniqueIndices.contains(indexedSeries.index))
.map((indexedSeries) => indexedSeries.value);
return DataFrame.fromSeries(newSeries);
}
DataFrame _dropByNames(Iterable<String> names, Iterable<Series> series) {
final uniqueNames = Set<String>.from(names);
final newSeries = series
.where((series) => !uniqueNames.contains(series.name));
return DataFrame.fromSeries(newSeries);
}
Map<String, Series> _getCachedOrCreateSeriesByName() =>
_seriesByName ??= Map
.fromEntries(series.map((series) => MapEntry(series.name, series)));
Map<String, Series> _seriesByName;
}