/
enumerable.rb
645 lines (544 loc) · 14.8 KB
/
enumerable.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
require 'epitools/core_ext/array'
module Enumerable
include Array::ToCSV
#
# 'true' if the Enumerable has no elements
#
def blank?
not any?
end
#
# `.all` is more fun to type than `.to_a`
#
alias_method :all, :to_a
#
# `includes?` is gramatically correct.
#
alias_method :includes?, :include?
#
# Skip the first n elements and return an Enumerator for the rest, or pass them
# in succession to the block, if given. This is like "drop", but returns an enumerator
# instead of converting the whole thing to an array.
#
def skip(n)
if block_given?
each do |x|
if n > 0
n -= 1
else
yield x
end
end
else
to_enum(:skip, n)
end
end
#
# Convert the Enumerable to an array and return a reversed copy
#
unless defined? reverse
def reverse
to_a.reverse
end
end
#
# Iterate over the Enumerable backwards (after converting it to an array)
#
unless defined? reverse_each
def reverse_each
to_a.reverse_each
end
end
#
# Split this enumerable into chunks, given some boundary condition. (Returns an array of arrays.)
#
# Options:
# :include_boundary => true #=> include the element that you're splitting at in the results
# (default: false)
# :after => true #=> split after the matched element (only has an effect when used with :include_boundary)
# (default: false)
# :once => flase #=> only perform one split (default: false)
#
# Examples:
# [1,2,3,4,5].split{ |e| e == 3 }
# #=> [ [1,2], [4,5] ]
#
# "hello\n\nthere\n".each_line.split_at("\n").to_a
# #=> [ ["hello\n"], ["there\n"] ]
#
# [1,2,3,4,5].split(:include_boundary=>true) { |e| e == 3 }
# #=> [ [1,2], [3,4,5] ]
#
# chapters = File.read("ebook.txt").split(/Chapter \d+/, :include_boundary=>true)
# #=> [ ["Chapter 1", ...], ["Chapter 2", ...], etc. ]
#
def split_at(matcher=nil, options={}, &block)
include_boundary = options[:include_boundary] || false
if matcher.nil?
boundary_test_proc = block
else
if matcher.is_a? Regexp
boundary_test_proc = proc { |element| element =~ matcher }
else
boundary_test_proc = proc { |element| element == matcher }
end
end
Enumerator.new do |yielder|
current_chunk = []
splits = 0
max_splits = options[:once] == true ? 1 : options[:max_splits]
each do |e|
if boundary_test_proc.call(e) and (max_splits == nil or splits < max_splits)
if current_chunk.empty? and not include_boundary
next # hit 2 boundaries in a row... just keep moving, people!
end
if options[:after]
# split after boundary
current_chunk << e if include_boundary # include the boundary, if necessary
yielder << current_chunk # shift everything after the boundary into the resultset
current_chunk = [] # start a new result
else
# split before boundary
yielder << current_chunk # shift before the boundary into the resultset
current_chunk = [] # start a new result
current_chunk << e if include_boundary # include the boundary, if necessary
end
splits += 1
else
current_chunk << e
end
end
yielder << current_chunk if current_chunk.any?
end
end
#
# Split the array into chunks, cutting between the matched element and the next element.
#
# Example:
# [1,2,3,4].split_after{|e| e == 3 } #=> [ [1,2,3], [4] ]
#
def split_after(matcher=nil, options={}, &block)
options[:after] ||= true
options[:include_boundary] ||= true
split_at(matcher, options, &block)
end
#
# Split the array into chunks, cutting before each matched element.
#
# Example:
# [1,2,3,4].split_before{|e| e == 3 } #=> [ [1,2], [3,4] ]
#
def split_before(matcher=nil, options={}, &block)
options[:include_boundary] ||= true
split_at(matcher, options, &block)
end
#
# Split the array into chunks, cutting between two elements.
#
# Example:
# [1,1,2,2].split_between{|a,b| a != b } #=> [ [1,1], [2,2] ]
#
def split_between(&block)
Enumerator.new do |yielder|
current = []
last = nil
each_cons(2) do |a,b|
current << a
if yield(a,b)
yielder << current
current = []
end
last = b
end
current << last unless last.nil?
yielder << current
end
end
alias_method :cut_between, :split_between
#
# Map elements of this Enumerable in parallel using a pool full of Threads
#
# eg: repos.parallel_map { |repo| system "git pull #{repo}" }
#
def parallel_map(num_workers=8, &block)
require 'thread'
queue = Queue.new
each { |e| queue.push e }
Enumerator.new(queue.size) do |y|
workers = (0...num_workers).map do
Thread.new do
begin
while e = queue.pop(true)
y << block.call(e)
end
rescue ThreadError
end
end
end
workers.map(&:join)
end
end
unless defined? sum
#
# Sum the elements
#
def sum(&block)
if block_given?
map(&block).reduce(:+)
else
reduce(:+)
end
end
end
alias_method :sum_by, :sum
#
# Average the elements
#
def average
count = 0
sum = 0
each { |e| count += 1; sum += e }
sum / count.to_f
end
#
# Lazily enumerate unique elements
# (WARNING: This can cause an infinite loop if you enumerate over a cycle,
# since it will keep reading the input until it finds a unique element)
#
def uniq
already_seen = Set.new
Enumerator::Lazy.new(self) do |yielder, value|
key = block_given? ? yield(value) : value
yielder << value if already_seen.add?(key)
end
end
alias_method :uniq_by, :uniq
#
# The same as "map", except that if an element is an Array or Enumerable, map is called
# recursively on that element. (Hashes are ignored because of the complications of block
# arguments and return values.)
#
# Example:
# [ [1,2], [3,4] ].deep_map{|e| e ** 2 } #=> [ [1,4], [9,16] ]
#
def map_recursively(max_depth=nil, current_depth=0, parent=nil, &block)
return self if max_depth and (current_depth > max_depth)
map do |obj|
if obj == parent # infinite loop scenario!
yield obj
else
case obj
when String, Hash
yield obj
when Enumerable
obj.map_recursively(max_depth, current_depth+1, self, &block)
else
yield obj
end
end
end
end
alias_method :deep_map, :map_recursively
alias_method :recursive_map, :map_recursively
alias_method :map_recursive, :map_recursively
#
# The same as "select", except that if an element is an Array or Enumerable, select is called
# recursively on that element.
#
# Example:
# [ [1,2], [3,4] ].select_recursively{|e| e % 2 == 0 } #=> [ [2], [4] ]
#
def select_recursively(max_depth=nil, current_depth=0, parent=nil, &block)
return self if max_depth and (current_depth > max_depth)
map do |obj|
if obj == parent # infinite loop scenario!
obj if yield obj
else
case obj
when String, Hash
obj if yield obj
when Enumerable
obj.deep_select(max_depth, current_depth+1, self, &block)
else
obj if yield obj
end
end
end.compact
end
alias_method :deep_select, :select_recursively
alias_method :recursive_select, :select_recursively
alias_method :select_recursive, :select_recursively
#
# Identical to "reduce" in ruby1.9 (or foldl in haskell.)
#
# Example:
# array.foldl{|a,b| a + b } == array[1..-1].inject(array[0]){|a,b| a + b }
#
def foldl(methodname=nil, &block)
result = nil
raise "Error: pass a parameter OR a block, not both!" unless !!methodname ^ block_given?
if methodname
each_with_index do |e,i|
if i == 0
result = e
next
end
result = result.send(methodname, e)
end
else
each_with_index do |e,i|
if i == 0
result = e
next
end
result = block.call(result, e)
end
end
result
end
#
# See: Array#permutation
#
def permutation(*args, &block)
to_a.permutation(*args, &block)
end
#
# See: See Array#combination
#
def combination(*args, &block)
to_a.combination(*args, &block)
end
#
# Returns the powerset of the Enumerable
#
# Example:
# [1,2].powerset #=> [[], [1], [2], [1, 2]]
#
def powerset
return to_enum(:powerset) unless block_given?
a = to_a
(0...2**a.size).each do |bitmask|
# the bit pattern of the numbers from 0..2^(elements)-1 can be used to select the elements of the set...
yield a.select.with_index { |e, i| bitmask[i] == 1 }
end
end
#
# Reverse zip (aligns the ends of two arrays, and zips them from right to left)
#
# eg:
# >> [5,39].rzip([:hours, :mins, :secs])
# => [ [:mins, 5], [:secs, 39] ]
#
# Note: Like zip, it will pad the second array if it's shorter than the first
#
def rzip(other)
reverse_each.zip(other.reverse_each).reverse_each
end
#
# Does the opposite of #zip -- converts [ [:a, 1], [:b, 2] ] to [ [:a, :b], [1, 2] ]
#
def unzip
# TODO: make it work for arrays containing uneven-length contents
to_a.transpose
end
#
# Associative grouping; groups all elements who share something in common with each other.
# You supply a block which takes two elements, and have it return true if they are "neighbours"
# (eg: belong in the same group).
#
# Example:
# [1,2,5,6].group_neighbours_by { |a,b| b-a <= 1 } #=> [ [1,2], [5,6] ]
#
# (Note: This is a very fast one-pass algorithm -- therefore, the groups must be pre-sorted.)
#
def group_neighbours_by(&block)
result = []
cluster = [first]
each_cons(2) do |a,b|
if yield(a,b)
cluster << b
else
result << cluster
cluster = [b]
end
end
result << cluster if cluster.any?
result
end
alias_method :group_neighbors_by, :group_neighbours_by
#
# Converts an array of 2-element key/value pairs into a Hash, grouped by key.
# (Like to_h, but the pairs can have duplicate keys.)
#
def grouped_to_h
result = Hash.of_arrays
each {|k,v| result[k] << v }
result
end
alias_method :group_to_h, :grouped_to_h
alias_method :to_h_in_groups, :grouped_to_h
alias_method :to_h_grouped, :grouped_to_h
#
# Convert the array into a stable iterator (Iter) object.
#
def to_iter
Iter.new(to_a)
end
alias_method :iter, :to_iter
#
# Counts how many instances of each object are in the collection,
# returning a hash. (Also optionally takes a block.)
#
# eg: [:a, :b, :c, :c, :c, :c].counts #=> {:a=>1, :b=>1, :c=>4}
#
def counts
h = Hash.of_integers
if block_given?
each { |x| h[yield x] += 1 }
else
each { |x| h[x] += 1 }
end
h
end
alias_method :count_by, :counts
alias_method :group_counts, :counts
#
# group_by the elements themselves
#
def groups
group_by(&:self)
end
alias_method :grouped, :groups
#
# run-length encode the array (returns an array of [count, element] pairs)
#
def rle
return to_enum(:rle) unless block_given?
last = nil
result = []
count = 1
each do |e|
if last
if last != e
yield [count, last]
count = 1
else
count += 1
end
end
last = e
end
yield [count, last]
end
#
# Sort strings by their numerical values
#
def sort_numerically
sort_by do |e|
e = e.path if e.is_a? Path
if e.is_a? String
e.split(/(\d+)/).map { |s| s =~ /^\d+$/ ? s.to_i : s }
else
[e]
end
end
end
#
# Multiplies this Enumerable by something. (Same behaviour as Enumerator#*)
#
def *(other)
case other
when Integer, String
to_enum * other
when Enumerable
to_enum.cross_product(other)
end
end
#
# Multiplies this Enumerable by itself `n` times.
#
def **(n)
[self].cycle(n).reduce(:*)
end
#
# Same behaviour as Enumerator#cross_product
#
def cross_product(other)
to_enum.cross_product(other)
end
alias_method :cross, :cross_product
end
class Enumerator
SPINNER = ['-', '\\', '|', '/']
#
# Display a spinner every `every` elements that pass through the Enumerator.
#
def with_spinner(every=37)
to_enum do |yielder|
spins = 0
each.with_index do |e, i|
if i % every == 0
print "\b" unless spins == 0
print SPINNER[spins % 4]
spins += 1
end
yielder << e
end
print "\b \b" # erase the spinner when done
end
end
#
# Pass in a bunch of indexes to elements in the Enumerator, and this method
# lazily returns them as a new Enumerator.
#
def values_at(*indexes)
return if indexes.empty?
indexes.flatten!
indexes = Set.new(indexes)
Enumerator.new do |yielder|
each_with_index do |e,i|
yielder << e if indexes.delete(i)
break if indexes.empty?
end
end
end
#
# Concatenates two Enumerators, returning a new Enumerator.
#
def +(other)
raise "Can only concatenate Enumerable things to Enumerators" unless Enumerable === other
Enumerator.new(count + other.count) do |yielder|
each { |e| yielder << e }
other.each { |e| yielder << e }
end
end
#
# Multiplies this Enumerator by something else.
#
# Enumerator * Integer == a new Enumerator that repeats the original one <Integer> times
# Enumerator * String == joins the Enumerator's elements into a new string, with <String> in between each pair of elements
# Enumerator * Enumerable == the cross product (aka. cartesian product) of the Enumerator and the Enumerable
#
def *(other)
case other
when Integer
cycle(other)
when String
join(other)
when Enumerable
cross(other)
else
raise "#{other.class} is not something that can be multiplied by an Enumerator"
end
end
#
# Takes the cross product (aka. cartesian product) of the Enumerator and the argument,
# returning a new Enumerator. (The argument must be some kind of Enumerable.)
#
def cross_product(other)
Enumerator.new(count + other.count) do |yielder|
each { |a| other.each { |b| yielder << [a,b] } }
end
end
alias_method :cross, :cross_product
end