-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
DataFrame.IDataView.cs
141 lines (120 loc) · 4.57 KB
/
DataFrame.IDataView.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace Microsoft.Data.Analysis
{
public partial class DataFrame : IDataView
{
// TODO: support shuffling
bool IDataView.CanShuffle => false;
private DataViewSchema _schema;
private DataViewSchema DataViewSchema
{
get
{
if (_schema != null)
{
return _schema;
}
var schemaBuilder = new DataViewSchema.Builder();
for (int i = 0; i < Columns.Count; i++)
{
DataFrameColumn baseColumn = Columns[i];
baseColumn.AddDataViewColumn(schemaBuilder);
}
_schema = schemaBuilder.ToSchema();
return _schema;
}
}
DataViewSchema IDataView.Schema => DataViewSchema;
long? IDataView.GetRowCount() => Rows.Count;
private DataViewRowCursor GetRowCursorCore(IEnumerable<DataViewSchema.Column> columnsNeeded)
{
var activeColumns = new bool[DataViewSchema.Count];
foreach (DataViewSchema.Column column in columnsNeeded)
{
if (column.Index < activeColumns.Length)
{
activeColumns[column.Index] = true;
}
}
return new RowCursor(this, activeColumns);
}
DataViewRowCursor IDataView.GetRowCursor(IEnumerable<DataViewSchema.Column> columnsNeeded, Random rand)
{
return GetRowCursorCore(columnsNeeded);
}
DataViewRowCursor[] IDataView.GetRowCursorSet(IEnumerable<DataViewSchema.Column> columnsNeeded, int n, Random rand)
{
// TODO: change to support parallel cursors
return new DataViewRowCursor[] { GetRowCursorCore(columnsNeeded) };
}
private sealed class RowCursor : DataViewRowCursor
{
private bool _disposed;
private long _position;
private readonly DataFrame _dataFrame;
private readonly Delegate[] _getters;
public RowCursor(DataFrame dataFrame, bool[] activeColumns)
{
Debug.Assert(dataFrame != null);
Debug.Assert(activeColumns != null);
_position = -1;
_dataFrame = dataFrame;
_getters = new Delegate[Schema.Count];
for (int i = 0; i < _getters.Length; i++)
{
if (!activeColumns[i])
continue;
_getters[i] = CreateGetterDelegate(i);
Debug.Assert(_getters[i] != null);
}
}
public override long Position => _position;
public override long Batch => 0;
public override DataViewSchema Schema => _dataFrame.DataViewSchema;
protected override void Dispose(bool disposing)
{
if (_disposed)
return;
if (disposing)
{
_position = -1;
}
_disposed = true;
base.Dispose(disposing);
}
private Delegate CreateGetterDelegate(int col)
{
DataFrameColumn column = _dataFrame.Columns[col];
return column.GetDataViewGetter(this);
}
public override ValueGetter<TValue> GetGetter<TValue>(DataViewSchema.Column column)
{
if (!IsColumnActive(column))
throw new ArgumentOutOfRangeException(nameof(column));
return (ValueGetter<TValue>)_getters[column.Index];
}
public override ValueGetter<DataViewRowId> GetIdGetter()
{
return (ref DataViewRowId value) => value = new DataViewRowId((ulong)_position, 0);
}
public override bool IsColumnActive(DataViewSchema.Column column)
{
return _getters[column.Index] != null;
}
public override bool MoveNext()
{
if (_disposed)
return false;
_position++;
return _position < _dataFrame.Rows.Count;
}
}
}
}