/
DataOperationsCatalog.xml
579 lines (578 loc) · 40.6 KB
/
DataOperationsCatalog.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
<Type Name="DataOperationsCatalog" FullName="Microsoft.ML.DataOperationsCatalog">
<TypeSignature Language="C#" Value="public sealed class DataOperationsCatalog" />
<TypeSignature Language="ILAsm" Value=".class public auto ansi sealed beforefieldinit DataOperationsCatalog extends System.Object" />
<TypeSignature Language="DocId" Value="T:Microsoft.ML.DataOperationsCatalog" />
<TypeSignature Language="VB.NET" Value="Public NotInheritable Class DataOperationsCatalog" />
<TypeSignature Language="F#" Value="type DataOperationsCatalog = class" />
<AssemblyInfo>
<AssemblyName>Microsoft.ML.Data</AssemblyName>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
</AssemblyInfo>
<Base>
<BaseTypeName>System.Object</BaseTypeName>
</Base>
<Interfaces />
<Docs>
<summary>
Class used to create components that operate on data, but are not part of the model training pipeline.
Includes components to load, save, cache, filter, shuffle, and split data.
</summary>
<remarks>To be added.</remarks>
</Docs>
<Members>
<Member MemberName="BootstrapSample">
<MemberSignature Language="C#" Value="public Microsoft.ML.IDataView BootstrapSample (Microsoft.ML.IDataView input, int? seed = default, bool complement = false);" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig instance class Microsoft.ML.IDataView BootstrapSample(class Microsoft.ML.IDataView input, valuetype System.Nullable`1<int32> seed, bool complement) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.ML.DataOperationsCatalog.BootstrapSample(Microsoft.ML.IDataView,System.Nullable{System.Int32},System.Boolean)" />
<MemberSignature Language="VB.NET" Value="Public Function BootstrapSample (input As IDataView, Optional seed As Nullable(Of Integer) = Nothing, Optional complement As Boolean = false) As IDataView" />
<MemberSignature Language="F#" Value="member this.BootstrapSample : Microsoft.ML.IDataView * Nullable<int> * bool -> Microsoft.ML.IDataView" Usage="dataOperationsCatalog.BootstrapSample (input, seed, complement)" />
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.ML.Data</AssemblyName>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>Microsoft.ML.IDataView</ReturnType>
</ReturnValue>
<Parameters>
<Parameter Name="input" Type="Microsoft.ML.IDataView" />
<Parameter Name="seed" Type="System.Nullable<System.Int32>" />
<Parameter Name="complement" Type="System.Boolean" />
</Parameters>
<Docs>
<param name="input">The input data.</param>
<param name="seed">The random seed. If unspecified, the random state will be instead derived from the <see cref="T:Microsoft.ML.MLContext" />.</param>
<param name="complement">Whether this is the out-of-bag sample, that is, all those rows that are not selected by the transform.
Can be used to create a complementary pair of samples by using the same seed.</param>
<summary>
Take an approximate bootstrap sample of <paramref name="input" />.
</summary>
<returns>To be added.</returns>
<remarks>
This sampler is a streaming version of <a href="https://en.wikipedia.org/wiki/Bootstrapping_(statistics)">bootstrap resampling</a>.
Instead of taking the whole dataset into memory and resampling, <see cref="M:Microsoft.ML.DataOperationsCatalog.BootstrapSample(Microsoft.ML.IDataView,System.Nullable{System.Int32},System.Boolean)" /> streams through the dataset and
uses a <a href="https://en.wikipedia.org/wiki/Poisson_distribution">Poisson</a>(1) distribution to select the number of times a
given row will be added to the sample. The <paramref name="complement" /> parameter allows for the creation of a bootstap sample
and complementary out-of-bag sample by using the same <paramref name="seed" />.
</remarks>
<example>
<format type="text/markdown"><![CDATA[
[!code-csharp[BootstrapSample](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs)]
]]></format>
</example>
</Docs>
</Member>
<Member MemberName="Cache">
<MemberSignature Language="C#" Value="public Microsoft.ML.IDataView Cache (Microsoft.ML.IDataView input, params string[] columnsToPrefetch);" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig instance class Microsoft.ML.IDataView Cache(class Microsoft.ML.IDataView input, string[] columnsToPrefetch) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.ML.DataOperationsCatalog.Cache(Microsoft.ML.IDataView,System.String[])" />
<MemberSignature Language="VB.NET" Value="Public Function Cache (input As IDataView, ParamArray columnsToPrefetch As String()) As IDataView" />
<MemberSignature Language="F#" Value="member this.Cache : Microsoft.ML.IDataView * string[] -> Microsoft.ML.IDataView" Usage="dataOperationsCatalog.Cache (input, columnsToPrefetch)" />
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.ML.Data</AssemblyName>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>Microsoft.ML.IDataView</ReturnType>
</ReturnValue>
<Parameters>
<Parameter Name="input" Type="Microsoft.ML.IDataView" />
<Parameter Name="columnsToPrefetch" Type="System.String[]">
<Attributes>
<Attribute>
<AttributeName Language="C#">[System.ParamArray]</AttributeName>
<AttributeName Language="F#">[<System.ParamArray>]</AttributeName>
</Attribute>
</Attributes>
</Parameter>
</Parameters>
<Docs>
<param name="input">The input data.</param>
<param name="columnsToPrefetch">The columns that must be cached whenever anything is cached. An empty array or null
value means that columns are cached upon their first access.</param>
<summary>
Creates a lazy in-memory cache of <paramref name="input" />.
</summary>
<returns>To be added.</returns>
<remarks>
Caching happens per-column. A column is only cached when it is first accessed.
In addition, <paramref name="columnsToPrefetch" /> are considered 'always needed', so these columns
will be cached the first time any data is requested.
</remarks>
<example>
<format type="text/markdown"><![CDATA[
[!code-csharp[Cache](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/Cache.cs)]
]]></format>
</example>
</Docs>
</Member>
<Member MemberName="CreateEnumerable<TRow>">
<MemberSignature Language="C#" Value="public System.Collections.Generic.IEnumerable<TRow> CreateEnumerable<TRow> (Microsoft.ML.IDataView data, bool reuseRowObject, bool ignoreMissingColumns = false, Microsoft.ML.Data.SchemaDefinition schemaDefinition = default) where TRow : class, new();" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig instance class System.Collections.Generic.IEnumerable`1<!!TRow> CreateEnumerable<class .ctor TRow>(class Microsoft.ML.IDataView data, bool reuseRowObject, bool ignoreMissingColumns, class Microsoft.ML.Data.SchemaDefinition schemaDefinition) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.ML.DataOperationsCatalog.CreateEnumerable``1(Microsoft.ML.IDataView,System.Boolean,System.Boolean,Microsoft.ML.Data.SchemaDefinition)" />
<MemberSignature Language="VB.NET" Value="Public Function CreateEnumerable(Of TRow As {Class, New}) (data As IDataView, reuseRowObject As Boolean, Optional ignoreMissingColumns As Boolean = false, Optional schemaDefinition As SchemaDefinition = Nothing) As IEnumerable(Of TRow)" />
<MemberSignature Language="F#" Value="member this.CreateEnumerable : Microsoft.ML.IDataView * bool * bool * Microsoft.ML.Data.SchemaDefinition -> seq<'Row (requires 'Row : null and 'Row : (new : unit -> 'Row))> (requires 'Row : null and 'Row : (new : unit -> 'Row))" Usage="dataOperationsCatalog.CreateEnumerable (data, reuseRowObject, ignoreMissingColumns, schemaDefinition)" />
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.ML.Data</AssemblyName>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>System.Collections.Generic.IEnumerable<TRow></ReturnType>
</ReturnValue>
<TypeParameters>
<TypeParameter Name="TRow">
<Constraints>
<ParameterAttribute>DefaultConstructorConstraint</ParameterAttribute>
<ParameterAttribute>ReferenceTypeConstraint</ParameterAttribute>
</Constraints>
</TypeParameter>
</TypeParameters>
<Parameters>
<Parameter Name="data" Type="Microsoft.ML.IDataView" />
<Parameter Name="reuseRowObject" Type="System.Boolean" />
<Parameter Name="ignoreMissingColumns" Type="System.Boolean" />
<Parameter Name="schemaDefinition" Type="Microsoft.ML.Data.SchemaDefinition" />
</Parameters>
<Docs>
<typeparam name="TRow">The user-defined item type.</typeparam>
<param name="data">The underlying data view.</param>
<param name="reuseRowObject">Whether to return the same object on every row, or allocate a new one per row.</param>
<param name="ignoreMissingColumns">Whether to ignore the case when a requested column is not present in the data view.</param>
<param name="schemaDefinition">Optional user-provided schema definition. If it is not present, the schema is inferred from the definition of T.</param>
<summary>
Convert an <see cref="T:Microsoft.ML.IDataView" /> into a strongly-typed <see cref="T:System.Collections.Generic.IEnumerable`1" />.
</summary>
<returns>The <see cref="T:System.Collections.Generic.IEnumerable`1" /> that holds the data in <paramref name="data" />. It can be enumerated multiple times.</returns>
<remarks>To be added.</remarks>
<example>
<format type="text/markdown"><![CDATA[
[!code-csharp[CreateEnumerable](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.cs)]
]]></format>
</example>
</Docs>
</Member>
<Member MemberName="CrossValidationSplit">
<MemberSignature Language="C#" Value="public System.Collections.Generic.IReadOnlyList<Microsoft.ML.DataOperationsCatalog.TrainTestData> CrossValidationSplit (Microsoft.ML.IDataView data, int numberOfFolds = 5, string samplingKeyColumnName = default, int? seed = default);" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig instance class System.Collections.Generic.IReadOnlyList`1<valuetype Microsoft.ML.DataOperationsCatalog/TrainTestData> CrossValidationSplit(class Microsoft.ML.IDataView data, int32 numberOfFolds, string samplingKeyColumnName, valuetype System.Nullable`1<int32> seed) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.ML.DataOperationsCatalog.CrossValidationSplit(Microsoft.ML.IDataView,System.Int32,System.String,System.Nullable{System.Int32})" />
<MemberSignature Language="VB.NET" Value="Public Function CrossValidationSplit (data As IDataView, Optional numberOfFolds As Integer = 5, Optional samplingKeyColumnName As String = Nothing, Optional seed As Nullable(Of Integer) = Nothing) As IReadOnlyList(Of DataOperationsCatalog.TrainTestData)" />
<MemberSignature Language="F#" Value="member this.CrossValidationSplit : Microsoft.ML.IDataView * int * string * Nullable<int> -> System.Collections.Generic.IReadOnlyList<Microsoft.ML.DataOperationsCatalog.TrainTestData>" Usage="dataOperationsCatalog.CrossValidationSplit (data, numberOfFolds, samplingKeyColumnName, seed)" />
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.ML.Data</AssemblyName>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>System.Collections.Generic.IReadOnlyList<Microsoft.ML.DataOperationsCatalog+TrainTestData></ReturnType>
</ReturnValue>
<Parameters>
<Parameter Name="data" Type="Microsoft.ML.IDataView" />
<Parameter Name="numberOfFolds" Type="System.Int32" />
<Parameter Name="samplingKeyColumnName" Type="System.String" />
<Parameter Name="seed" Type="System.Nullable<System.Int32>" />
</Parameters>
<Docs>
<param name="data">The dataset to split.</param>
<param name="numberOfFolds">Number of cross-validation folds.</param>
<param name="samplingKeyColumnName">Name of a column to use for grouping rows. If two examples share the same value of the <paramref name="samplingKeyColumnName" />,
they are guaranteed to appear in the same subset (train or test). This can be used to ensure no label leakage from the train to the test set.
Note that when performing a Ranking Experiment, the <paramref name="samplingKeyColumnName" /> must be the GroupId column.
If <see langword="null" /> no row grouping will be performed.</param>
<param name="seed">Seed for the random number generator used to select rows for cross-validation folds.</param>
<summary>
Split the dataset into cross-validation folds of train set and test set.
Respects the <paramref name="samplingKeyColumnName" /> if provided.
</summary>
<returns>To be added.</returns>
<remarks>To be added.</remarks>
<example>
<format type="text/markdown"><![CDATA[
[!code-csharp[CrossValidationSplit](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/CrossValidationSplit.cs)]
]]></format>
</example>
</Docs>
</Member>
<Member MemberName="FilterRowsByColumn">
<MemberSignature Language="C#" Value="public Microsoft.ML.IDataView FilterRowsByColumn (Microsoft.ML.IDataView input, string columnName, double lowerBound = -∞, double upperBound = ∞);" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig instance class Microsoft.ML.IDataView FilterRowsByColumn(class Microsoft.ML.IDataView input, string columnName, float64 lowerBound, float64 upperBound) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.ML.DataOperationsCatalog.FilterRowsByColumn(Microsoft.ML.IDataView,System.String,System.Double,System.Double)" />
<MemberSignature Language="VB.NET" Value="Public Function FilterRowsByColumn (input As IDataView, columnName As String, Optional lowerBound As Double = -∞, Optional upperBound As Double = ∞) As IDataView" />
<MemberSignature Language="F#" Value="member this.FilterRowsByColumn : Microsoft.ML.IDataView * string * double * double -> Microsoft.ML.IDataView" Usage="dataOperationsCatalog.FilterRowsByColumn (input, columnName, lowerBound, upperBound)" />
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.ML.Data</AssemblyName>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>Microsoft.ML.IDataView</ReturnType>
</ReturnValue>
<Parameters>
<Parameter Name="input" Type="Microsoft.ML.IDataView" />
<Parameter Name="columnName" Type="System.String" />
<Parameter Name="lowerBound" Type="System.Double" />
<Parameter Name="upperBound" Type="System.Double" />
</Parameters>
<Docs>
<param name="input">The input data.</param>
<param name="columnName">The name of a column to use for filtering.</param>
<param name="lowerBound">The inclusive lower bound.</param>
<param name="upperBound">The exclusive upper bound.</param>
<summary>
Filter the dataset by the values of a numeric column.
</summary>
<returns>To be added.</returns>
<remarks>
Keep only those rows that satisfy the range condition: the value of column <paramref name="columnName" />
must be between <paramref name="lowerBound" /> (inclusive) and <paramref name="upperBound" /> (exclusive).
</remarks>
<example>
<format type="text/markdown"><![CDATA[
[!code-csharp[FilterRowsByColumn](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByColumn.cs)]
]]></format>
</example>
</Docs>
</Member>
<Member MemberName="FilterRowsByKeyColumnFraction">
<MemberSignature Language="C#" Value="public Microsoft.ML.IDataView FilterRowsByKeyColumnFraction (Microsoft.ML.IDataView input, string columnName, double lowerBound = 0, double upperBound = 1);" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig instance class Microsoft.ML.IDataView FilterRowsByKeyColumnFraction(class Microsoft.ML.IDataView input, string columnName, float64 lowerBound, float64 upperBound) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.ML.DataOperationsCatalog.FilterRowsByKeyColumnFraction(Microsoft.ML.IDataView,System.String,System.Double,System.Double)" />
<MemberSignature Language="VB.NET" Value="Public Function FilterRowsByKeyColumnFraction (input As IDataView, columnName As String, Optional lowerBound As Double = 0, Optional upperBound As Double = 1) As IDataView" />
<MemberSignature Language="F#" Value="member this.FilterRowsByKeyColumnFraction : Microsoft.ML.IDataView * string * double * double -> Microsoft.ML.IDataView" Usage="dataOperationsCatalog.FilterRowsByKeyColumnFraction (input, columnName, lowerBound, upperBound)" />
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.ML.Data</AssemblyName>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>Microsoft.ML.IDataView</ReturnType>
</ReturnValue>
<Parameters>
<Parameter Name="input" Type="Microsoft.ML.IDataView" />
<Parameter Name="columnName" Type="System.String" />
<Parameter Name="lowerBound" Type="System.Double" />
<Parameter Name="upperBound" Type="System.Double" />
</Parameters>
<Docs>
<param name="input">The input data.</param>
<param name="columnName">The name of a column to use for filtering.</param>
<param name="lowerBound">The inclusive lower bound.</param>
<param name="upperBound">The exclusive upper bound.</param>
<summary>
Filter the dataset by the values of a <see cref="T:Microsoft.ML.Data.KeyDataViewType" /> column.
</summary>
<returns>To be added.</returns>
<remarks>
Keep only those rows that satisfy the range condition: the value of a key column <paramref name="columnName" />
(treated as a fraction of the entire key range) must be between <paramref name="lowerBound" /> (inclusive) and <paramref name="upperBound" /> (exclusive).
This filtering is useful if the <paramref name="columnName" /> is a key column obtained by some 'stable randomization',
for example, hashing.
</remarks>
<example>
<format type="text/markdown"><![CDATA[
[!code-csharp[FilterRowsByKeyColumnFraction](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByKeyColumnFraction.cs)]
]]></format>
</example>
</Docs>
</Member>
<Member MemberName="FilterRowsByMissingValues">
<MemberSignature Language="C#" Value="public Microsoft.ML.IDataView FilterRowsByMissingValues (Microsoft.ML.IDataView input, params string[] columns);" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig instance class Microsoft.ML.IDataView FilterRowsByMissingValues(class Microsoft.ML.IDataView input, string[] columns) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.ML.DataOperationsCatalog.FilterRowsByMissingValues(Microsoft.ML.IDataView,System.String[])" />
<MemberSignature Language="VB.NET" Value="Public Function FilterRowsByMissingValues (input As IDataView, ParamArray columns As String()) As IDataView" />
<MemberSignature Language="F#" Value="member this.FilterRowsByMissingValues : Microsoft.ML.IDataView * string[] -> Microsoft.ML.IDataView" Usage="dataOperationsCatalog.FilterRowsByMissingValues (input, columns)" />
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.ML.Data</AssemblyName>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>Microsoft.ML.IDataView</ReturnType>
</ReturnValue>
<Parameters>
<Parameter Name="input" Type="Microsoft.ML.IDataView" />
<Parameter Name="columns" Type="System.String[]">
<Attributes>
<Attribute>
<AttributeName Language="C#">[System.ParamArray]</AttributeName>
<AttributeName Language="F#">[<System.ParamArray>]</AttributeName>
</Attribute>
</Attributes>
</Parameter>
</Parameters>
<Docs>
<param name="input">The input data.</param>
<param name="columns">Name of the columns to filter on. If a row is has a missing value in any of
these columns, it will be dropped from the dataset.</param>
<summary>
Drop rows where any column in <paramref name="columns" /> contains a missing value.
</summary>
<returns>To be added.</returns>
<remarks>To be added.</remarks>
<example>
<format type="text/markdown"><![CDATA[
[!code-csharp[FilterRowsByMissingValues](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByMissingValues.cs)]
]]></format>
</example>
</Docs>
</Member>
<Member MemberName="LoadFromEnumerable<TRow>">
<MemberSignature Language="C#" Value="public Microsoft.ML.IDataView LoadFromEnumerable<TRow> (System.Collections.Generic.IEnumerable<TRow> data, Microsoft.ML.Data.SchemaDefinition schemaDefinition = default) where TRow : class;" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig instance class Microsoft.ML.IDataView LoadFromEnumerable<class TRow>(class System.Collections.Generic.IEnumerable`1<!!TRow> data, class Microsoft.ML.Data.SchemaDefinition schemaDefinition) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.ML.DataOperationsCatalog.LoadFromEnumerable``1(System.Collections.Generic.IEnumerable{``0},Microsoft.ML.Data.SchemaDefinition)" />
<MemberSignature Language="VB.NET" Value="Public Function LoadFromEnumerable(Of TRow As Class) (data As IEnumerable(Of TRow), Optional schemaDefinition As SchemaDefinition = Nothing) As IDataView" />
<MemberSignature Language="F#" Value="member this.LoadFromEnumerable : seq<'Row (requires 'Row : null)> * Microsoft.ML.Data.SchemaDefinition -> Microsoft.ML.IDataView (requires 'Row : null)" Usage="dataOperationsCatalog.LoadFromEnumerable (data, schemaDefinition)" />
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.ML.Data</AssemblyName>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>Microsoft.ML.IDataView</ReturnType>
</ReturnValue>
<TypeParameters>
<TypeParameter Name="TRow">
<Constraints>
<ParameterAttribute>ReferenceTypeConstraint</ParameterAttribute>
</Constraints>
</TypeParameter>
</TypeParameters>
<Parameters>
<Parameter Name="data" Type="System.Collections.Generic.IEnumerable<TRow>" />
<Parameter Name="schemaDefinition" Type="Microsoft.ML.Data.SchemaDefinition" />
</Parameters>
<Docs>
<typeparam name="TRow">The user-defined item type.</typeparam>
<param name="data">The enumerable data containing type <typeparamref name="TRow" /> to convert to a <see cref="T:Microsoft.ML.IDataView" />.</param>
<param name="schemaDefinition">The optional schema definition of the data view to create. If <c>null</c>,
the schema definition is inferred from <typeparamref name="TRow" />.</param>
<summary>
Create a new <see cref="T:Microsoft.ML.IDataView" /> over an enumerable of the items of user-defined type.
The user maintains ownership of the <paramref name="data" /> and the resulting data view will
never alter the contents of the <paramref name="data" />.
Since <see cref="T:Microsoft.ML.IDataView" /> is assumed to be immutable, the user is expected to support
multiple enumerations of the <paramref name="data" /> that would return the same results, unless
the user knows that the data will only be cursored once.
One typical usage for streaming data view could be: create the data view that lazily loads data
as needed, then apply pre-trained transformations to it and cursor through it for transformation
results.
</summary>
<returns>The constructed <see cref="T:Microsoft.ML.IDataView" />.</returns>
<remarks>To be added.</remarks>
<example>
<format type="text/markdown"><![CDATA[
[!code-csharp[LoadFromEnumerable](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadFromEnumerable.cs)]
]]></format>
</example>
</Docs>
</Member>
<Member MemberName="LoadFromEnumerable<TRow>">
<MemberSignature Language="C#" Value="public Microsoft.ML.IDataView LoadFromEnumerable<TRow> (System.Collections.Generic.IEnumerable<TRow> data, Microsoft.ML.DataViewSchema schema) where TRow : class;" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig instance class Microsoft.ML.IDataView LoadFromEnumerable<class TRow>(class System.Collections.Generic.IEnumerable`1<!!TRow> data, class Microsoft.ML.DataViewSchema schema) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.ML.DataOperationsCatalog.LoadFromEnumerable``1(System.Collections.Generic.IEnumerable{``0},Microsoft.ML.DataViewSchema)" />
<MemberSignature Language="VB.NET" Value="Public Function LoadFromEnumerable(Of TRow As Class) (data As IEnumerable(Of TRow), schema As DataViewSchema) As IDataView" />
<MemberSignature Language="F#" Value="member this.LoadFromEnumerable : seq<'Row (requires 'Row : null)> * Microsoft.ML.DataViewSchema -> Microsoft.ML.IDataView (requires 'Row : null)" Usage="dataOperationsCatalog.LoadFromEnumerable (data, schema)" />
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.ML.Data</AssemblyName>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>Microsoft.ML.IDataView</ReturnType>
</ReturnValue>
<TypeParameters>
<TypeParameter Name="TRow">
<Constraints>
<ParameterAttribute>ReferenceTypeConstraint</ParameterAttribute>
</Constraints>
</TypeParameter>
</TypeParameters>
<Parameters>
<Parameter Name="data" Type="System.Collections.Generic.IEnumerable<TRow>" />
<Parameter Name="schema" Type="Microsoft.ML.DataViewSchema" />
</Parameters>
<Docs>
<typeparam name="TRow">The user-defined item type.</typeparam>
<param name="data">The enumerable data containing type <typeparamref name="TRow" /> to convert to an <see cref="T:Microsoft.ML.IDataView" />.</param>
<param name="schema">The schema of the returned <see cref="T:Microsoft.ML.IDataView" />.</param>
<summary>
Create a new <see cref="T:Microsoft.ML.IDataView" /> over an enumerable of the items of user-defined type using the provided <see cref="T:Microsoft.ML.DataViewSchema" />,
which might contain more information about the schema than the type can capture.
</summary>
<returns>An <see cref="T:Microsoft.ML.IDataView" /> with the given <paramref name="schema" />.</returns>
<remarks>
The user maintains ownership of the <paramref name="data" /> and the resulting data view will
never alter the contents of the <paramref name="data" />.
Since <see cref="T:Microsoft.ML.IDataView" /> is assumed to be immutable, the user is expected to support
multiple enumerations of the <paramref name="data" /> that would return the same results, unless
the user knows that the data will only be cursored once.
One typical usage for streaming data view could be: create the data view that lazily loads data
as needed, then apply pre-trained transformations to it and cursor through it for transformation
results.
One practical usage of this would be to supply the feature column names through the <see cref="T:Microsoft.ML.DataViewSchema.Annotations" />.
</remarks>
</Docs>
</Member>
<Member MemberName="ShuffleRows">
<MemberSignature Language="C#" Value="public Microsoft.ML.IDataView ShuffleRows (Microsoft.ML.IDataView input, int? seed = default, int shufflePoolSize = 1000, bool shuffleSource = true);" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig instance class Microsoft.ML.IDataView ShuffleRows(class Microsoft.ML.IDataView input, valuetype System.Nullable`1<int32> seed, int32 shufflePoolSize, bool shuffleSource) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.ML.DataOperationsCatalog.ShuffleRows(Microsoft.ML.IDataView,System.Nullable{System.Int32},System.Int32,System.Boolean)" />
<MemberSignature Language="VB.NET" Value="Public Function ShuffleRows (input As IDataView, Optional seed As Nullable(Of Integer) = Nothing, Optional shufflePoolSize As Integer = 1000, Optional shuffleSource As Boolean = true) As IDataView" />
<MemberSignature Language="F#" Value="member this.ShuffleRows : Microsoft.ML.IDataView * Nullable<int> * int * bool -> Microsoft.ML.IDataView" Usage="dataOperationsCatalog.ShuffleRows (input, seed, shufflePoolSize, shuffleSource)" />
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.ML.Data</AssemblyName>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>Microsoft.ML.IDataView</ReturnType>
</ReturnValue>
<Parameters>
<Parameter Name="input" Type="Microsoft.ML.IDataView" />
<Parameter Name="seed" Type="System.Nullable<System.Int32>" />
<Parameter Name="shufflePoolSize" Type="System.Int32" />
<Parameter Name="shuffleSource" Type="System.Boolean" />
</Parameters>
<Docs>
<param name="input">The input data.</param>
<param name="seed">The random seed. If unspecified, the random state will be instead derived from the <see cref="T:Microsoft.ML.MLContext" />.</param>
<param name="shufflePoolSize">The number of rows to hold in the pool. Setting this to 1 will turn off pool shuffling and
<see cref="M:Microsoft.ML.DataOperationsCatalog.ShuffleRows(Microsoft.ML.IDataView,System.Nullable{System.Int32},System.Int32,System.Boolean)" /> will only perform a shuffle by reading <paramref name="input" /> in a random order.</param>
<param name="shuffleSource">If <see langword="false" />, the transform will not attempt to read <paramref name="input" /> in a random order and only use
pooling to shuffle. This parameter has no effect if the <see cref="P:Microsoft.ML.IDataView.CanShuffle" /> property of <paramref name="input" /> is <see langword="false" />.
</param>
<summary>
Shuffle the rows of <paramref name="input" />.
</summary>
<returns>To be added.</returns>
<remarks>
<see cref="M:Microsoft.ML.DataOperationsCatalog.ShuffleRows(Microsoft.ML.IDataView,System.Nullable{System.Int32},System.Int32,System.Boolean)" /> will shuffle the rows of any input <see cref="T:Microsoft.ML.IDataView" /> using a streaming approach.
In order to not load the entire dataset in memory, a pool of <paramref name="shufflePoolSize" /> rows will be used
to randomly select rows to output. The pool is constructed from the first <paramref name="shufflePoolSize" /> rows
in <paramref name="input" />. Rows will then be randomly yielded from the pool and replaced with the next row from <paramref name="input" />
until all the rows have been yielded, resulting in a new <see cref="T:Microsoft.ML.IDataView" /> of the same size as <paramref name="input" />
but with the rows in a randomized order.
If the <see cref="P:Microsoft.ML.IDataView.CanShuffle" /> property of <paramref name="input" /> is true, then it will also be read into the
pool in a random order, offering two sources of randomness.
</remarks>
<example>
<format type="text/markdown"><![CDATA[
[!code-csharp[ShuffleRows](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/ShuffleRows.cs)]
]]></format>
</example>
</Docs>
</Member>
<Member MemberName="SkipRows">
<MemberSignature Language="C#" Value="public Microsoft.ML.IDataView SkipRows (Microsoft.ML.IDataView input, long count);" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig instance class Microsoft.ML.IDataView SkipRows(class Microsoft.ML.IDataView input, int64 count) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.ML.DataOperationsCatalog.SkipRows(Microsoft.ML.IDataView,System.Int64)" />
<MemberSignature Language="VB.NET" Value="Public Function SkipRows (input As IDataView, count As Long) As IDataView" />
<MemberSignature Language="F#" Value="member this.SkipRows : Microsoft.ML.IDataView * int64 -> Microsoft.ML.IDataView" Usage="dataOperationsCatalog.SkipRows (input, count)" />
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.ML.Data</AssemblyName>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>Microsoft.ML.IDataView</ReturnType>
</ReturnValue>
<Parameters>
<Parameter Name="input" Type="Microsoft.ML.IDataView" />
<Parameter Name="count" Type="System.Int64" />
</Parameters>
<Docs>
<param name="input">The input data.</param>
<param name="count">Number of rows to skip.</param>
<summary>
Skip <paramref name="count" /> rows in <paramref name="input" />.
</summary>
<returns>To be added.</returns>
<remarks>
Skips the first <paramref name="count" /> rows from <paramref name="input" /> and returns an <see cref="T:Microsoft.ML.IDataView" /> with all other rows.
</remarks>
<example>
<format type="text/markdown"><![CDATA[
[!code-csharp[SkipRows](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SkipRows.cs)]
]]></format>
</example>
</Docs>
</Member>
<Member MemberName="TakeRows">
<MemberSignature Language="C#" Value="public Microsoft.ML.IDataView TakeRows (Microsoft.ML.IDataView input, long count);" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig instance class Microsoft.ML.IDataView TakeRows(class Microsoft.ML.IDataView input, int64 count) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.ML.DataOperationsCatalog.TakeRows(Microsoft.ML.IDataView,System.Int64)" />
<MemberSignature Language="VB.NET" Value="Public Function TakeRows (input As IDataView, count As Long) As IDataView" />
<MemberSignature Language="F#" Value="member this.TakeRows : Microsoft.ML.IDataView * int64 -> Microsoft.ML.IDataView" Usage="dataOperationsCatalog.TakeRows (input, count)" />
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.ML.Data</AssemblyName>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>Microsoft.ML.IDataView</ReturnType>
</ReturnValue>
<Parameters>
<Parameter Name="input" Type="Microsoft.ML.IDataView" />
<Parameter Name="count" Type="System.Int64" />
</Parameters>
<Docs>
<param name="input">The input data.</param>
<param name="count">Number of rows to take.</param>
<summary>
Take <paramref name="count" /> rows from <paramref name="input" />.
</summary>
<returns>To be added.</returns>
<remarks>
Returns returns an <see cref="T:Microsoft.ML.IDataView" /> with the first <paramref name="count" /> rows from <paramref name="input" />.
</remarks>
<example>
<format type="text/markdown"><![CDATA[
[!code-csharp[TakeRows](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TakeRows.cs)]
]]></format>
</example>
</Docs>
</Member>
<Member MemberName="TrainTestSplit">
<MemberSignature Language="C#" Value="public Microsoft.ML.DataOperationsCatalog.TrainTestData TrainTestSplit (Microsoft.ML.IDataView data, double testFraction = 0.1, string samplingKeyColumnName = default, int? seed = default);" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig instance valuetype Microsoft.ML.DataOperationsCatalog/TrainTestData TrainTestSplit(class Microsoft.ML.IDataView data, float64 testFraction, string samplingKeyColumnName, valuetype System.Nullable`1<int32> seed) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.ML.DataOperationsCatalog.TrainTestSplit(Microsoft.ML.IDataView,System.Double,System.String,System.Nullable{System.Int32})" />
<MemberSignature Language="VB.NET" Value="Public Function TrainTestSplit (data As IDataView, Optional testFraction As Double = 0.1, Optional samplingKeyColumnName As String = Nothing, Optional seed As Nullable(Of Integer) = Nothing) As DataOperationsCatalog.TrainTestData" />
<MemberSignature Language="F#" Value="member this.TrainTestSplit : Microsoft.ML.IDataView * double * string * Nullable<int> -> Microsoft.ML.DataOperationsCatalog.TrainTestData" Usage="dataOperationsCatalog.TrainTestSplit (data, testFraction, samplingKeyColumnName, seed)" />
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.ML.Data</AssemblyName>
<AssemblyVersion>1.0.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>Microsoft.ML.DataOperationsCatalog+TrainTestData</ReturnType>
</ReturnValue>
<Parameters>
<Parameter Name="data" Type="Microsoft.ML.IDataView" />
<Parameter Name="testFraction" Type="System.Double" />
<Parameter Name="samplingKeyColumnName" Type="System.String" />
<Parameter Name="seed" Type="System.Nullable<System.Int32>" />
</Parameters>
<Docs>
<param name="data">The dataset to split.</param>
<param name="testFraction">The fraction of data to go into the test set.</param>
<param name="samplingKeyColumnName">Name of a column to use for grouping rows. If two examples share the same value of the <paramref name="samplingKeyColumnName" />,
they are guaranteed to appear in the same subset (train or test). This can be used to ensure no label leakage from the train to the test set.
Note that when performing a Ranking Experiment, the <paramref name="samplingKeyColumnName" /> must be the GroupId column.
If <see langword="null" /> no row grouping will be performed.</param>
<param name="seed">Seed for the random number generator used to select rows for the train-test split.</param>
<summary>
Split the dataset into the train set and test set according to the given fraction.
Respects the <paramref name="samplingKeyColumnName" /> if provided.
</summary>
<returns>To be added.</returns>
<remarks>To be added.</remarks>
<example>
<format type="text/markdown"><![CDATA[
[!code-csharp[TrainTestSplit](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TrainTestSplit.cs)]
]]></format>
</example>
</Docs>
</Member>
</Members>
</Type>