/
VBufferUtils.cs
1376 lines (1283 loc) · 59.1 KB
/
VBufferUtils.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Generic;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime;
namespace Microsoft.ML.Internal.Utilities
{
// REVIEW: Consider automatic densification in some of the operations, where appropriate.
// REVIEW: Once we do the conversions from Vector/WritableVector, review names of methods,
// parameters, parameter order, etc.
/// <summary>
/// Convenience utilities for vector operations on <see cref="VBuffer{T}"/>.
/// </summary>
[BestFriend]
internal static class VBufferUtils
{
private const float SparsityThreshold = 0.25f;
public static bool HasNaNs(in VBuffer<Single> buffer)
{
var values = buffer.GetValues();
for (int i = 0; i < values.Length; i++)
{
if (Single.IsNaN(values[i]))
return true;
}
return false;
}
public static bool HasNaNs(in VBuffer<Double> buffer)
{
var values = buffer.GetValues();
for (int i = 0; i < values.Length; i++)
{
if (Double.IsNaN(values[i]))
return true;
}
return false;
}
public static bool HasNonFinite(in VBuffer<Single> buffer)
{
var values = buffer.GetValues();
for (int i = 0; i < values.Length; i++)
{
if (!FloatUtils.IsFinite(values[i]))
return true;
}
return false;
}
public static bool HasNonFinite(in VBuffer<Double> buffer)
{
var values = buffer.GetValues();
for (int i = 0; i < values.Length; i++)
{
if (!FloatUtils.IsFinite(values[i]))
return true;
}
return false;
}
public static VBuffer<T> CreateEmpty<T>(int length)
{
Contracts.CheckParam(length >= 0, nameof(length));
return new VBuffer<T>(length, 0, null, null);
}
public static VBuffer<T> CreateDense<T>(int length)
{
Contracts.CheckParam(length >= 0, nameof(length));
return new VBuffer<T>(length, new T[length]);
}
/// <summary>
/// Applies <paramref name="visitor"/> to every explicitly defined element of the vector,
/// in order of index.
/// </summary>
public static void ForEachDefined<T>(in VBuffer<T> a, Action<int, T> visitor)
{
Contracts.CheckValue(visitor, nameof(visitor));
// REVIEW: This is analogous to an old Vector method, but is there
// any real reason to have it given that we have the Items extension method?
var aValues = a.GetValues();
if (a.IsDense)
{
for (int i = 0; i < aValues.Length; i++)
visitor(i, aValues[i]);
}
else
{
var aIndices = a.GetIndices();
for (int i = 0; i < aValues.Length; i++)
visitor(aIndices[i], aValues[i]);
}
}
/// <summary>
/// Applies the <paramref name="visitor "/>to each corresponding pair of elements
/// where the item is emplicitly defined in the vector. By explicitly defined,
/// we mean that for a given index <c>i</c>, both vectors have an entry in
/// <see cref="VBuffer{T}.GetValues"/> corresponding to that index.
/// </summary>
/// <param name="a">The first vector</param>
/// <param name="b">The second vector</param>
/// <param name="visitor">Delegate to apply to each pair of non-zero values.
/// This is passed the index, and two values</param>
public static void ForEachBothDefined<T>(in VBuffer<T> a, in VBuffer<T> b, Action<int, T, T> visitor)
{
Contracts.Check(a.Length == b.Length, "Vectors must have the same dimensionality.");
Contracts.CheckValue(visitor, nameof(visitor));
var aValues = a.GetValues();
var bValues = b.GetValues();
if (a.IsDense && b.IsDense)
{
for (int i = 0; i < a.Length; i++)
visitor(i, aValues[i], bValues[i]);
}
else if (b.IsDense)
{
var aIndices = a.GetIndices();
for (int i = 0; i < aValues.Length; i++)
visitor(aIndices[i], aValues[i], bValues[aIndices[i]]);
}
else if (a.IsDense)
{
var bIndices = b.GetIndices();
for (int i = 0; i < bValues.Length; i++)
visitor(bIndices[i], aValues[bIndices[i]], bValues[i]);
}
else
{
// Both sparse.
int aI = 0;
int bI = 0;
var aIndices = a.GetIndices();
var bIndices = b.GetIndices();
while (aI < aValues.Length && bI < bValues.Length)
{
int i = aIndices[aI];
int j = bIndices[bI];
if (i == j)
visitor(i, aValues[aI++], bValues[bI++]);
else if (i < j)
aI++;
else
bI++;
}
}
}
/// <summary>
/// Applies the ParallelVisitor to each corresponding pair of elements where at least one is non-zero, in order of index.
/// </summary>
/// <param name="a">a vector</param>
/// <param name="b">another vector</param>
/// <param name="visitor">Function to apply to each pair of non-zero values - passed the index, and two values</param>
public static void ForEachEitherDefined<T>(in VBuffer<T> a, in VBuffer<T> b, Action<int, T, T> visitor)
{
Contracts.Check(a.Length == b.Length, "Vectors must have the same dimensionality.");
Contracts.CheckValue(visitor, nameof(visitor));
var aValues = a.GetValues();
var bValues = b.GetValues();
if (a.IsDense && b.IsDense)
{
for (int i = 0; i < a.Length; ++i)
visitor(i, aValues[i], bValues[i]);
}
else if (b.IsDense)
{
int aI = 0;
var aIndices = a.GetIndices();
for (int i = 0; i < b.Length; i++)
{
T aVal = (aI < aValues.Length && i == aIndices[aI]) ? aValues[aI++] : default(T);
visitor(i, aVal, bValues[i]);
}
}
else if (a.IsDense)
{
int bI = 0;
var bIndices = b.GetIndices();
for (int i = 0; i < a.Length; i++)
{
T bVal = (bI < bValues.Length && i == bIndices[bI]) ? bValues[bI++] : default(T);
visitor(i, aValues[i], bVal);
}
}
else
{
// Both sparse
int aI = 0;
int bI = 0;
var aIndices = a.GetIndices();
var bIndices = b.GetIndices();
while (aI < aValues.Length && bI < bValues.Length)
{
int diff = aIndices[aI] - bIndices[bI];
if (diff == 0)
{
visitor(bIndices[bI], aValues[aI], bValues[bI]);
aI++;
bI++;
}
else if (diff < 0)
{
visitor(aIndices[aI], aValues[aI], default(T));
aI++;
}
else
{
visitor(bIndices[bI], default(T), bValues[bI]);
bI++;
}
}
while (aI < aValues.Length)
{
visitor(aIndices[aI], aValues[aI], default(T));
aI++;
}
while (bI < bValues.Length)
{
visitor(bIndices[bI], default(T), bValues[bI]);
bI++;
}
}
}
/// <summary>
/// Sets all values in the vector to the default value for the type, without changing the
/// density or index structure of the input array. That is to say, the count of the input
/// vector will be the same afterwards as it was before.
/// </summary>
public static void Clear<T>(ref VBuffer<T> dst)
{
var editor = VBufferEditor.CreateFromBuffer(ref dst);
editor.Values.Clear();
}
// REVIEW: Look into removing slot in this and other manipulators, so that we
// could potentially have something around, say, skipping default entries.
/// <summary>
/// A delegate for functions that can change a value.
/// </summary>
/// <param name="slot">Index of entry</param>
/// <param name="value">Value to change</param>
public delegate void SlotValueManipulator<T>(int slot, ref T value);
/// <summary>
/// A predicate on some sort of value.
/// </summary>
/// <param name="src">The value to test</param>
/// <returns>The result of some sort of test from that value</returns>
public delegate bool ValuePredicate<T>(ref T src);
/// <summary>
/// Applies the <paramref name="manip"/> to every explicitly defined
/// element of the vector.
/// </summary>
public static void Apply<T>(ref VBuffer<T> dst, SlotValueManipulator<T> manip)
{
Contracts.CheckValue(manip, nameof(manip));
var editor = VBufferEditor.CreateFromBuffer(ref dst);
if (dst.IsDense)
{
for (int i = 0; i < editor.Values.Length; i++)
manip(i, ref editor.Values[i]);
}
else
{
var dstIndices = dst.GetIndices();
for (int i = 0; i < editor.Values.Length; i++)
manip(dstIndices[i], ref editor.Values[i]);
}
}
/// <summary>
/// Applies some function on a value at a particular slot value, changing that slot value.
/// This function will, wherever possible, not change the structure of <paramref name="dst"/>.
/// If the vector is sparse, and the corresponding slot is not explicitly represented,
/// then this can involve memory copying and possibly memory reallocation on <paramref name="dst"/>.
/// However, if the item is explicitly represented, even if the item is set to the default
/// value of <typeparamref name="T"/> it will not change the structure of <paramref name="dst"/>,
/// in terms of sparsifying a dense array, or dropping indices.
/// </summary>
/// <param name="dst">The vector to modify</param>
/// <param name="slot">The slot of the vector to modify</param>
/// <param name="manip">The manipulation function</param>
/// <param name="pred">A predicate that returns true if we should skip insertion of a value into
/// sparse vector if it was default. If the predicate is null, we insert any non-default.</param>
public static void ApplyAt<T>(ref VBuffer<T> dst, int slot, SlotValueManipulator<T> manip, ValuePredicate<T> pred = null)
{
Contracts.CheckParam(0 <= slot && slot < dst.Length, nameof(slot));
Contracts.CheckValue(manip, nameof(manip));
Contracts.CheckValueOrNull(pred);
var editor = VBufferEditor.CreateFromBuffer(ref dst);
int dstValuesCount = editor.Values.Length;
if (dst.IsDense)
{
// The vector is dense, so we can just do a direct access.
manip(slot, ref editor.Values[slot]);
return;
}
int idx = 0;
if (dstValuesCount > 0 && Utils.TryFindIndexSorted(editor.Indices, 0, dstValuesCount, slot, out idx))
{
// Vector is sparse, but the item exists so we can access it.
manip(slot, ref editor.Values[idx]);
return;
}
// The vector is sparse and there is no corresponding item, yet.
T value = default(T);
manip(slot, ref value);
// If this item is not defined and it's default, no need to proceed of course.
pred = pred ?? ((ref T val) => Comparer<T>.Default.Compare(val, default(T)) == 0);
if (pred(ref value))
return;
// We have to insert this value, somehow.
// There is a modest special case where there is exactly one free slot
// we are modifying in the sparse vector, in which case the vector becomes
// dense. Then there is no need to do anything with indices.
bool needIndices = dstValuesCount + 1 < dst.Length;
editor = VBufferEditor.Create(ref dst, dst.Length, dstValuesCount + 1, keepOldOnResize: true);
if (idx != dstValuesCount)
{
// We have to do some sort of shift copy.
int sliceLength = dstValuesCount - idx;
if (needIndices)
editor.Indices.Slice(idx, sliceLength).CopyTo(editor.Indices.Slice(idx + 1));
editor.Values.Slice(idx, sliceLength).CopyTo(editor.Values.Slice(idx + 1));
}
if (needIndices)
editor.Indices[idx] = slot;
editor.Values[idx] = value;
dst = editor.Commit();
}
/// <summary>
/// Given a vector, turns it into an equivalent dense representation.
/// </summary>
public static void Densify<T>(ref VBuffer<T> dst)
{
if (dst.IsDense)
return;
var indices = dst.GetIndices();
var values = dst.GetValues();
var editor = VBufferEditor.Create(
ref dst,
dst.Length);
if (!editor.CreatedNewValues)
{
// Densify in place.
for (int i = values.Length; --i >= 0;)
{
Contracts.Assert(i <= indices[i]);
editor.Values[indices[i]] = values[i];
}
if (values.Length == 0)
editor.Values.Clear();
else
{
int min = 0;
for (int ii = 0; ii < values.Length; ++ii)
{
editor.Values.Slice(min, indices[ii] - min).Clear();
min = indices[ii] + 1;
}
editor.Values.Slice(min, dst.Length - min).Clear();
}
}
else
{
// createdNewValues is true, keepOldOnResize is false, so Values is already cleared
for (int i = 0; i < values.Length; ++i)
editor.Values[indices[i]] = values[i];
}
dst = editor.Commit();
}
/// <summary>
/// Given a vector, ensure that the first <paramref name="denseCount"/> slots are explicitly
/// represented.
/// </summary>
public static void DensifyFirst<T>(ref VBuffer<T> dst, int denseCount)
{
Contracts.Check(0 <= denseCount && denseCount <= dst.Length);
var dstValues = dst.GetValues();
var dstIndices = dst.GetIndices();
if (dst.IsDense || denseCount == 0 || (dstValues.Length >= denseCount && dstIndices[denseCount - 1] == denseCount - 1))
return;
if (denseCount == dst.Length)
{
Densify(ref dst);
return;
}
// Densify the first denseCount entries.
if (dstIndices.IsEmpty)
{
// no previous values
var newIndicesEditor = VBufferEditor.Create(ref dst, dst.Length, denseCount);
Utils.FillIdentity(newIndicesEditor.Indices, denseCount);
newIndicesEditor.Values.Clear();
dst = newIndicesEditor.Commit();
return;
}
int lim = Utils.FindIndexSorted(dstIndices, 0, dstValues.Length, denseCount);
Contracts.Assert(lim < denseCount);
int newLen = dstValues.Length + denseCount - lim;
if (newLen == dst.Length)
{
Densify(ref dst);
return;
}
var editor = VBufferEditor.Create(ref dst, dst.Length, newLen, keepOldOnResize: true);
int sliceLength = dstValues.Length - lim;
editor.Values.Slice(lim, sliceLength).CopyTo(editor.Values.Slice(denseCount));
editor.Indices.Slice(lim, sliceLength).CopyTo(editor.Indices.Slice(denseCount));
int i = lim - 1;
for (int ii = denseCount; --ii >= 0;)
{
editor.Values[ii] = i >= 0 && dstIndices[i] == ii ? dstValues[i--] : default(T);
editor.Indices[ii] = ii;
}
dst = editor.Commit();
}
/// <summary>
/// Creates a maybe sparse copy of a VBuffer.
/// Whether the created copy is sparse or not is determined by the proportion of non-default entries compared to the sparsity parameter.
/// </summary>
public static void CreateMaybeSparseCopy<T>(in VBuffer<T> src, ref VBuffer<T> dst, InPredicate<T> isDefaultPredicate, float sparsityThreshold = SparsityThreshold)
{
Contracts.CheckParam(0 < sparsityThreshold && sparsityThreshold < 1, nameof(sparsityThreshold));
if (!src.IsDense || src.Length < 20)
{
src.CopyTo(ref dst);
return;
}
int sparseCount = 0;
var sparseCountThreshold = (int)(src.Length * sparsityThreshold);
var srcValues = src.GetValues();
for (int i = 0; i < src.Length; i++)
{
if (!isDefaultPredicate(in srcValues[i]))
sparseCount++;
if (sparseCount > sparseCountThreshold)
{
src.CopyTo(ref dst);
return;
}
}
var editor = VBufferEditor.Create(ref dst, src.Length, sparseCount);
if (sparseCount > 0)
{
int j = 0;
for (int i = 0; i < src.Length; i++)
{
if (!isDefaultPredicate(in srcValues[i]))
{
Contracts.Assert(j < sparseCount);
editor.Indices[j] = i;
editor.Values[j] = srcValues[i];
j++;
}
}
Contracts.Assert(j == sparseCount);
}
dst = editor.Commit();
}
/// <summary>
/// A delegate for functions that access an index and two corresponding
/// values, possibly changing one of them.
/// </summary>
/// <param name="slot">Slot index of the entry.</param>
/// <param name="src">Value from first vector.</param>
/// <param name="dst">Value from second vector, which may be manipulated.</param>
public delegate void PairManipulator<TSrc, TDst>(int slot, TSrc src, ref TDst dst);
/// <summary>
/// A delegate for functions that access an index and two corresponding
/// values, stores the result in another vector.
/// </summary>
/// <param name="slot">Slot index of the entry.</param>
/// <param name="src">Value from first vector.</param>
/// <param name="dst">Value from second vector.</param>
/// <param name="res">The value to store the result.</param>
public delegate void PairManipulatorCopy<TSrc, TDst>(int slot, TSrc src, TDst dst, ref TDst res);
/// <summary>
/// Applies the <see cref="PairManipulator{TSrc,TDst}"/> to each pair of elements
/// where <paramref name="src"/> is defined, in order of index. If there is
/// some value at an index in <paramref name="dst"/> that is not defined in
/// <paramref name="src"/>, that item remains without any further modification.
/// If either of the vectors are dense, the resulting <paramref name="dst"/>
/// will be dense. Otherwise, if both are sparse, the output will be sparse iff
/// there is any slot that is not explicitly represented in either vector.
/// </summary>
/// <param name="src">Argument vector, whose elements are only read</param>
/// <param name="dst">Argument vector, that could change</param>
/// <param name="manip">Function to apply to each pair of elements</param>
public static void ApplyWith<TSrc, TDst>(in VBuffer<TSrc> src, ref VBuffer<TDst> dst, PairManipulator<TSrc, TDst> manip)
{
ApplyWithCore(in src, ref dst, manip, outer: false);
}
/// <summary>
/// Applies the <see cref="PairManipulator{TSrc,TDst}"/> to each pair of elements
/// where <paramref name="src"/> is defined, in order of index. It stores the result
/// in another vector. If there is some value at an index in <paramref name="dst"/>
/// that is not defined in <paramref name="src"/>, that slot value is copied to the
/// corresponding slot in the result vector without any further modification.
/// If either of the vectors are dense, the resulting <paramref name="res"/>
/// will be dense. Otherwise, if both are sparse, the output will be sparse iff
/// there is any slot that is not explicitly represented in either vector.
/// </summary>
/// <param name="src">Argument vector, whose elements are only read</param>
/// <param name="dst">Argument vector, whose elements are read in most cases. But in some
/// cases <paramref name="dst"/> may be densified.</param>
/// <param name="res">Result vector</param>
/// <param name="manip">Function to apply to each pair of elements</param>
public static void ApplyWithCopy<TSrc, TDst>(in VBuffer<TSrc> src, ref VBuffer<TDst> dst, ref VBuffer<TDst> res, PairManipulatorCopy<TSrc, TDst> manip)
{
ApplyWithCoreCopy(in src, ref dst, ref res, manip, outer: false);
}
/// <summary>
/// Applies the <see cref="PairManipulator{TSrc,TDst}"/> to each pair of elements
/// where either <paramref name="src"/> or <paramref name="dst"/>, has an element
/// defined at that index. If either of the vectors are dense, the resulting
/// <paramref name="dst"/> will be dense. Otherwise, if both are sparse, the output
/// will be sparse iff there is any slot that is not explicitly represented in
/// either vector.
/// </summary>
/// <param name="src">Argument vector, whose elements are only read</param>
/// <param name="dst">Argument vector, that could change</param>
/// <param name="manip">Function to apply to each pair of elements</param>
public static void ApplyWithEitherDefined<TSrc, TDst>(in VBuffer<TSrc> src, ref VBuffer<TDst> dst, PairManipulator<TSrc, TDst> manip)
{
ApplyWithCore(in src, ref dst, manip, outer: true);
}
/// <summary>
/// Applies the <see cref="PairManipulator{TSrc,TDst}"/> to each pair of elements
/// where either <paramref name="src"/> or <paramref name="dst"/>, has an element
/// defined at that index. It stores the result in another vector <paramref name="res"/>.
/// If either of the vectors are dense, the resulting <paramref name="res"/>
/// will be dense. Otherwise, if both are sparse, the output will be sparse iff
/// there is any slot that is not explicitly represented in either vector.
/// </summary>
/// <param name="src">Argument vector, whose elements are only read</param>
/// <param name="dst">Argument vector, whose elements are read in most cases. But in some
/// cases <paramref name="dst"/> may be densified.</param>
/// <param name="res">Result vector</param>
/// <param name="manip">Function to apply to each pair of elements</param>
public static void ApplyWithEitherDefinedCopy<TSrc, TDst>(in VBuffer<TSrc> src, ref VBuffer<TDst> dst, ref VBuffer<TDst> res, PairManipulatorCopy<TSrc, TDst> manip)
{
ApplyWithCoreCopy(in src, ref dst, ref res, manip, outer: true);
}
/// <summary>
/// The actual implementation of <see cref="ApplyWith"/> and
/// <see cref="ApplyWithEitherDefined{TSrc,TDst}"/>, that has internal branches on the implementation
/// where necessary depending on whether this is an inner or outer join of the
/// indices of <paramref name="src"/> on <paramref name="dst"/>.
/// </summary>
private static void ApplyWithCore<TSrc, TDst>(in VBuffer<TSrc> src, ref VBuffer<TDst> dst, PairManipulator<TSrc, TDst> manip, bool outer)
{
Contracts.Check(src.Length == dst.Length, "Vectors must have the same dimensionality.");
Contracts.CheckValue(manip, nameof(manip));
// We handle all of the permutations of the density/sparsity of src/dst through
// special casing below. Each subcase in turn handles appropriately the treatment
// of the "outer" parameter. There are nine, top level cases. Each case is
// considered in this order.
// 1. srcValues.Length == 0.
// 2. src.Dense.
// 3. dst.Dense.
// 4. dstValues.Length == 0.
// Beyond this point the cases can assume both src/dst are sparse non-empty vectors.
// We then calculate the size of the resulting output array, then use that to fall
// through to more special cases.
// 5. The union will result in dst becoming dense. So just densify it, then recurse.
// 6. Neither src nor dst's indices is a subset of the other.
// 7. The two sets of indices are identical.
// 8. src's indices are a subset of dst's.
// 9. dst's indices are a subset of src's.
// Each one of these subcases also separately handles the "outer" parameter, if
// necessary. It is unnecessary if src's indices form a superset (proper or improper)
// of dst's indices. So, for example, cases 2, 4, 7, 9 do not require special handling.
// Case 5 does not require special handling, because it falls through to other cases
// that do the special handling for them.
var srcValues = src.GetValues();
var dstValues = dst.GetValues();
var dstIndices = dst.GetIndices();
var editor = VBufferEditor.CreateFromBuffer(ref dst);
if (srcValues.Length == 0)
{
// Major case 1, with srcValues.Length == 0.
if (!outer)
return;
if (dst.IsDense)
{
for (int i = 0; i < dst.Length; i++)
manip(i, default(TSrc), ref editor.Values[i]);
}
else
{
for (int i = 0; i < dstValues.Length; i++)
manip(dstIndices[i], default(TSrc), ref editor.Values[i]);
}
return;
}
if (src.IsDense)
{
// Major case 2, with src.Dense.
if (!dst.IsDense)
{
Densify(ref dst);
editor = VBufferEditor.CreateFromBuffer(ref dst);
}
// Both are now dense. Both cases of outer are covered.
for (int i = 0; i < srcValues.Length; i++)
manip(i, srcValues[i], ref editor.Values[i]);
return;
}
var srcIndices = src.GetIndices();
if (dst.IsDense)
{
// Major case 3, with dst.Dense. Note that !src.Dense.
if (outer)
{
int sI = 0;
int sIndex = srcIndices[sI];
for (int i = 0; i < dst.Length; ++i)
{
if (i == sIndex)
{
manip(i, srcValues[sI], ref editor.Values[i]);
sIndex = ++sI == srcValues.Length ? src.Length : srcIndices[sI];
}
else
manip(i, default(TSrc), ref editor.Values[i]);
}
}
else
{
for (int i = 0; i < srcValues.Length; i++)
manip(srcIndices[i], srcValues[i], ref editor.Values[srcIndices[i]]);
}
return;
}
if (dstValues.Length == 0)
{
// Major case 4, with dst empty. Note that !src.Dense.
// Neither is dense, and dst is empty. Both cases of outer are covered.
editor = VBufferEditor.Create(ref dst,
src.Length,
srcValues.Length,
maxValuesCapacity: src.Length);
editor.Values.Clear();
for (int i = 0; i < srcValues.Length; i++)
manip(editor.Indices[i] = srcIndices[i], srcValues[i], ref editor.Values[i]);
dst = editor.Commit();
return;
}
// Beyond this point, we can assume both a and b are sparse with positive count.
int dI = 0;
int newCount = dstValues.Length;
// Try to find each src index in dst indices, counting how many more we'll add.
for (int sI = 0; sI < srcValues.Length; sI++)
{
int sIndex = srcIndices[sI];
while (dI < dstValues.Length && dstIndices[dI] < sIndex)
dI++;
if (dI == dstValues.Length)
{
newCount += srcValues.Length - sI;
break;
}
if (dstIndices[dI] == sIndex)
dI++;
else
newCount++;
}
Contracts.Assert(newCount > 0);
Contracts.Assert(0 < srcValues.Length && srcValues.Length <= newCount);
Contracts.Assert(0 < dstValues.Length && dstValues.Length <= newCount);
// REVIEW: Densify above a certain threshold, not just if
// the output will necessarily become dense? But then we get into
// the dubious business of trying to pick the "right" densification
// threshold.
if (newCount == dst.Length)
{
// Major case 5, dst will become dense through the application of
// this. Just recurse one level so one of the initial conditions
// can catch it, specifically, the major case 3.
// This is unnecessary -- falling through to the sparse code will
// actually handle this case just fine -- but it is more efficient.
Densify(ref dst);
ApplyWithCore(in src, ref dst, manip, outer);
return;
}
if (newCount != srcValues.Length && newCount != dstValues.Length)
{
// Major case 6, neither set of indices is a subset of the other.
// This subcase used to fall through to another subcase, but this
// proved to be inefficient so we go to the little bit of extra work
// to handle it here.
editor = VBufferEditor.Create(ref dst,
src.Length,
newCount,
maxValuesCapacity: dst.Length);
var indices = editor.Indices;
var values = editor.Values;
int sI = srcValues.Length - 1;
dI = dstValues.Length - 1;
int sIndex = srcIndices[sI];
int dIndex = dstIndices[dI];
// Go from the end, so that even if we're writing over dst's vectors in
// place, we do not corrupt the data as we are reorganizing it.
for (int i = newCount; --i >= 0;)
{
if (sIndex < dIndex)
{
indices[i] = dIndex;
values[i] = dstValues[dI];
if (outer)
manip(dIndex, default(TSrc), ref values[i]);
dIndex = --dI >= 0 ? dstIndices[dI] : -1;
}
else if (sIndex > dIndex)
{
indices[i] = sIndex;
values[i] = default(TDst);
manip(sIndex, srcValues[sI], ref values[i]);
sIndex = --sI >= 0 ? srcIndices[sI] : -1;
}
else
{
// We should not have run past the beginning, due to invariants.
Contracts.Assert(sIndex >= 0);
Contracts.Assert(sIndex == dIndex);
indices[i] = dIndex;
values[i] = dstValues[dI];
manip(sIndex, srcValues[sI], ref values[i]);
sIndex = --sI >= 0 ? srcIndices[sI] : -1;
dIndex = --dI >= 0 ? dstIndices[dI] : -1;
}
}
dst = editor.Commit();
return;
}
if (newCount == dstValues.Length)
{
if (newCount == srcValues.Length)
{
// Major case 7, the set of indices is the same for src and dst.
Contracts.Assert(srcValues.Length == dstValues.Length);
for (int i = 0; i < srcValues.Length; i++)
{
Contracts.Assert(srcIndices[i] == dstIndices[i]);
manip(srcIndices[i], srcValues[i], ref editor.Values[i]);
}
return;
}
// Major case 8, the indices of src must be a subset of dst's indices.
Contracts.Assert(newCount > srcValues.Length);
dI = 0;
if (outer)
{
int sI = 0;
int sIndex = srcIndices[sI];
for (int i = 0; i < dstValues.Length; ++i)
{
if (dstIndices[i] == sIndex)
{
manip(sIndex, srcValues[sI], ref editor.Values[i]);
sIndex = ++sI == srcValues.Length ? src.Length : srcIndices[sI];
}
else
manip(dstIndices[i], default(TSrc), ref editor.Values[i]);
}
}
else
{
for (int sI = 0; sI < srcValues.Length; sI++)
{
int sIndex = srcIndices[sI];
while (dstIndices[dI] < sIndex)
dI++;
Contracts.Assert(dstIndices[dI] == sIndex);
manip(sIndex, srcValues[sI], ref editor.Values[dI++]);
}
}
return;
}
if (newCount == srcValues.Length)
{
// Major case 9, the indices of dst must be a subset of src's indices. Both cases of outer are covered.
// First do a "quasi" densification of dst, by making the indices
// of dst correspond to those in src.
editor = VBufferEditor.Create(ref dst, newCount, dstValues.Length);
int sI = 0;
for (dI = 0; dI < dstValues.Length; ++dI)
{
int bIndex = dstIndices[dI];
while (srcIndices[sI] < bIndex)
sI++;
Contracts.Assert(srcIndices[sI] == bIndex);
editor.Indices[dI] = sI++;
}
dst = editor.Commit();
Densify(ref dst);
editor = VBufferEditor.Create(ref dst,
src.Length,
newCount,
maxValuesCapacity: src.Length);
srcIndices.CopyTo(editor.Indices);
for (sI = 0; sI < srcValues.Length; sI++)
manip(srcIndices[sI], srcValues[sI], ref editor.Values[sI]);
dst = editor.Commit();
return;
}
Contracts.Assert(false);
}
/// <summary>
/// The actual implementation of <see cref="ApplyWithCopy{TSrc,TDst}"/> and
/// <see cref="ApplyWithEitherDefinedCopy{TSrc,TDst}"/>, that has internal branches on the implementation
/// where necessary depending on whether this is an inner or outer join of the
/// indices of <paramref name="src"/> on <paramref name="dst"/>.
/// </summary>
private static void ApplyWithCoreCopy<TSrc, TDst>(in VBuffer<TSrc> src, ref VBuffer<TDst> dst, ref VBuffer<TDst> res, PairManipulatorCopy<TSrc, TDst> manip, bool outer)
{
Contracts.Check(src.Length == dst.Length, "Vectors must have the same dimensionality.");
Contracts.CheckValue(manip, nameof(manip));
int length = src.Length;
var srcValues = src.GetValues();
var dstValues = dst.GetValues();
if (dstValues.Length == 0)
{
if (srcValues.Length == 0)
{
Resize(ref res, length, 0);
}
else if (src.IsDense)
{
Contracts.Assert(srcValues.Length == src.Length);
var editor = VBufferEditor.Create(ref res, length);
for (int i = 0; i < length; i++)
manip(i, srcValues[i], default(TDst), ref editor.Values[i]);
res = editor.Commit();
}
else
{
// src is non-empty sparse.
int count = srcValues.Length;
Contracts.Assert(0 < count && count < length);
var editor = VBufferEditor.Create(ref res, length, count);
var srcIndices = src.GetIndices();
srcIndices.CopyTo(editor.Indices);
for (int ii = 0; ii < count; ii++)
{
int i = srcIndices[ii];
editor.Indices[ii] = i;
manip(i, srcValues[ii], default(TDst), ref editor.Values[ii]);
}
res = editor.Commit();
}
}
else if (dst.IsDense)
{
var editor = VBufferEditor.Create(ref res, length);
if (srcValues.Length == 0)
{
if (outer)
{
// Apply manip to all slots, as all slots of dst are defined.
for (int j = 0; j < length; j++)
manip(j, default(TSrc), dstValues[j], ref editor.Values[j]);
}
else
{
// Copy only. No slot of src is defined.
for (int j = 0; j < length; j++)
editor.Values[j] = dstValues[j];
}
res = editor.Commit();
}
else if (src.IsDense)
{
Contracts.Assert(srcValues.Length == src.Length);
for (int i = 0; i < length; i++)
manip(i, srcValues[i], dstValues[i], ref editor.Values[i]);
res = editor.Commit();
}
else
{
// src is sparse and non-empty.
int count = srcValues.Length;
Contracts.Assert(0 < count && count < length);
int ii = 0;
var srcIndices = src.GetIndices();
int i = srcIndices[ii];
if (outer)
{
// All slots of dst are defined. Always apply manip.
for (int j = 0; j < length; j++)
{
if (j == i)
{
manip(j, srcValues[ii], dstValues[j], ref editor.Values[j]);
i = ++ii == count ? length : srcIndices[ii];
}
else
manip(j, default(TSrc), dstValues[j], ref editor.Values[j]);
}
}
else
{
// Only apply manip for those slots where src is defined. Otherwise just copy.
for (int j = 0; j < length; j++)
{
if (j == i)
{
manip(j, srcValues[ii], dstValues[j], ref editor.Values[j]);
i = ++ii == count ? length : srcIndices[ii];
}
else
editor.Values[j] = dstValues[j];
}
}
res = editor.Commit();
}
}
else
{
// dst is non-empty sparse
int dstCount = dstValues.Length;
var dstIndices = dst.GetIndices();
Contracts.Assert(dstCount > 0);
if (srcValues.Length == 0)
{
var editor = VBufferEditor.Create(ref res, length, dstCount);
if (outer)
{
for (int jj = 0; jj < dstCount; jj++)
{
int j = dstIndices[jj];
editor.Indices[jj] = j;
manip(j, default(TSrc), dstValues[jj], ref editor.Values[jj]);
}
}