# Lecture 03; Feb. 11, 2020        
# The itertools Module  
see: https://nyu-cds.github.io/python-itertools/

---

- The concept of iterators
- Infinite Iterators
- Finite Iterators
- Combinatoric Generators

---

- When to create an array (as well as a list or tuple), we first have to allocate a block of system memory
to store these elments.  

- In order to look up any specific element in our list, we simply need to know which element we want and a pointer where the object starts.






---

The __itertools__ module implements a number of iterator building blocks that provide fast, memory efficient tools.


An iterator behaves like a list of values, with some **important differences**:

- The values are generated on demand (sequence is stored in memory)
- The values can only be accessed in sequence (not like an array)
- The values can only be accessed once



An iterator is an object that provides two methods:

- __iter__ which returns the iterator itself
- __next__ which returns the next value from the iterator



In [1]:
it = iter('advanced_python')

# first 8 elements
print(it.__next__())
print(it.__next__())
print(it.__next__())
print(it.__next__())
print(it.__next__())
print(it.__next__())
print(it.__next__())
print(it.__next__())

# the rest
lst = [e for e in it]
print("\nlst: %s" % lst)

# at this point the iterator is completed
print(it.__next__())

a
d
v
a
n
c
e
d

lst: ['_', 'p', 'y', 't', 'h', 'o', 'n']


StopIteration: 

In [2]:
it = iter('advanced_python')
print(list(it))
    
# At this point the iterator is finished
print(list(it))

['a', 'd', 'v', 'a', 'n', 'c', 'e', 'd', '_', 'p', 'y', 't', 'h', 'o', 'n']
[]


In [3]:
#enumerate() method adds a counter to an iterable 
it = iter('advanced_python')

for e in enumerate(it): 
    print(e)

(0, 'a')
(1, 'd')
(2, 'v')
(3, 'a')
(4, 'n')
(5, 'c')
(6, 'e')
(7, 'd')
(8, '_')
(9, 'p')
(10, 'y')
(11, 't')
(12, 'h')
(13, 'o')
(14, 'n')


In [4]:
# Additionally look at: 
for i in range(1000): 
    print(i)
    # some_work(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

---

### range() vs xrange()

#### In Python2: 

- range(): returns a list  
- xrange(): returns a generator object
    
#### In Python3: 
- there is no xrange, and range returns a generator 

In [5]:
# returns a list

def demo_range(start, stop, step=1):
    numbers = []
    while start < stop:
        numbers.append(start)
        start += step
    return numbers

---

- Note that the range implementation must precreate the list of all numbers within the range.

- We use memory for "all numbers".


In [6]:
# returns a generator

def demo_xrange(start, stop, step=1):
    while start < stop:
        yield start
        start += step
        
# We do not precreate the list of all numbers within the range.
# We do not use memory for "all numbers".


----

- The generator is able to 'return' many values. 

- Every time the code gets to the yield, the function emits its value

- when another value is requested the function resumes running (maintaining its previous state) and emits the new value.

In [7]:
# how many numbers in list_of_numbers are divisible by 3?

list_of_numbers = demo_range(0, 1000)
divisible_by_three = len([n for n in list_of_numbers if n % 3 == 0])

print("divisible_by_three:", divisible_by_three)

divisible_by_three: 334


In [10]:
generator = (1 for n in list_of_numbers if n % 3 == 0)
print("generator:", generator)
divisible_by_three = sum(generator)
print(divisible_by_three)

#Here, we have a generator that emits a value of 1 whenever it encounters a number divisible by 3, 
# and nothing otherwise.

generator: <generator object <genexpr> at 0x1055f3840>
334


In [9]:
# at this point, generator is completed

for e in generator:
    print(e)

sum(generator)

0

## Infinite Iterators
__itertools__ package comes with three iterators that can iterate infinitely.
- useful for generating numbers or cycling over iterables of unknown length
- infinite iterators need to be stopped

#### itertools.count(start,step)  
https://docs.python.org/3/library/itertools.html#itertools.count

In [13]:
from itertools import count

for i in count(10, 3):
    print(i)
    if i >= 30: break
        

10
13
16
19
22
25
28
31


#### itertools.islice(seq, [start,] stop [, step])   

https://docs.python.org/3/library/itertools.html#itertools.islice

Make an iterator that returns selected elements from the iterable.  


In [14]:
from itertools import islice

print(list(islice(count(10, 3), 5)))     # first 5 elements 
print(list(islice(count(10, 3), 10)))    # first 10 elements 
print(list(islice(count(10, 3), 5, 10))) # second 5 elements: from 5th to 9th element (including those)

[10, 13, 16, 19, 22]
[10, 13, 16, 19, 22, 25, 28, 31, 34, 37]
[25, 28, 31, 34, 37]


In [15]:
# itertools.islice(seq, [start,] stop [,step])

print(list(islice(count(10,3), 10)))       # first 10 elements 
print(list(islice(count(10,3), 3, 10, 2))) # from 3rd to 9th element (including those), every second element 

[10, 13, 16, 19, 22, 25, 28, 31, 34, 37]
[19, 25, 31, 37]


**None**: "If stop is None, then iteration continues until the iterator is exhausted, if at all; otherwise, it stops at the specified position."

In [16]:
print(list(islice('ABCDEFG', 2, None)))
print(list(islice('ABCDEFG', 100))) # and len('ABCDEFG') = 7

['C', 'D', 'E', 'F', 'G']
['A', 'B', 'C', 'D', 'E', 'F', 'G']


#### itertools.cycle(seq)

https://docs.python.org/2/library/itertools.html#itertools.cycle

In [17]:
from itertools import cycle

print(list(islice(cycle('abc'), 12)))

['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c']


In [18]:
# iterators can be used in different ways
lst = ['advanced','python','for','data','science']
print(list(islice(cycle(lst), 10)))

['advanced', 'python', 'for', 'data', 'science', 'advanced', 'python', 'for', 'data', 'science']


#### itertools.repeat(elem [,times])

https://docs.python.org/2/library/itertools.html#itertools.repeat

In [19]:
from itertools import repeat

# repeat an object: e.g. string

print(list(repeat('abcde', 5)))

['abcde', 'abcde', 'abcde', 'abcde', 'abcde']


In [20]:
# repeat an object: e.g. list
print(list(repeat(['CDS', 'Courant', 'NYU'], 5)))


[['CDS', 'Courant', 'NYU'], ['CDS', 'Courant', 'NYU'], ['CDS', 'Courant', 'NYU'], ['CDS', 'Courant', 'NYU'], ['CDS', 'Courant', 'NYU']]


## Finite Iterators
itertools also has a number of iterators that terminate.

#### itertools.accumulate(seq [, func])

In [21]:
from itertools import accumulate

print(list(accumulate(range(1, 11)))) # 1, 1 + 2, 1 + 2 + 3, 1 + 2 + 3 + 4,...

[1, 3, 6, 10, 15, 21, 28, 36, 45, 55]


In [22]:
import operator

print(list(accumulate(range(1, 11), operator.mul))) # 1, 1 * 2, 1 * 2 * 3, 1 * 2 * 3 * 4

[1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800]


In [23]:
# it can also handle non-numeric lists such as strings

print(list(accumulate('cds_nyu')))

print(list(accumulate(repeat('python ', 3))))

['c', 'cd', 'cds', 'cds_', 'cds_n', 'cds_ny', 'cds_nyu']
['python ', 'python python ', 'python python python ']


#### itertools.chain(*iterables)  

In [24]:
my_list = ['foo', 'bar']
cmd = ['ls', '/some/dir']
numbers = list(range(5))
my_list.extend([cmd, numbers])

print(my_list)

['foo', 'bar', ['ls', '/some/dir'], [0, 1, 2, 3, 4]]


In [31]:
list(range(5))

[0, 1, 2, 3, 4]

The chain iterator takes a series of iterables and flattens them down into one long iterable.

In [25]:
from itertools import chain

my_list = list(chain(['foo', 'bar'], cmd, numbers))

print(my_list)

['foo', 'bar', 'ls', '/some/dir', 0, 1, 2, 3, 4]


#### itertools.compress(seq, selectors)
Useful for filtering an iterable using a second boolean iterable (i.e. an indicator)

https://docs.python.org/2/library/itertools.html#itertools.compress


In [26]:
from itertools import compress

letters = 'ABCDEFGHIJKJM'
bools = [True, True, False, True, False, True, True, True, False, True]

# notice the sizes do not need to match
print(list(compress(letters, bools)))

['A', 'B', 'D', 'F', 'G', 'H', 'J']


In [27]:
def either_aeiou(e):
    return (e == 'A') or (e == 'E') or (e == 'I') or (e == 'O') or (e == 'U')

print(list(compress(letters, [either_aeiou(e) for e in letters])))

['A', 'E', 'I']


---

### itertools.dropwhile(predicate, seq)  

- Drop the elements while the predicate is True; afterwards, returns every element.

### itertools.takewhile(predicate, seq)  

- Take the elements while the predicate is True

In [45]:
from itertools import dropwhile, takewhile

print(list(dropwhile(lambda x: x > 5, [7, 8, 9, 10, 1, 2, 3, 10])))

print(list(takewhile(lambda x: x > 5, [6, 7, 8, 9, 10, 1, 2, 3, 20])))

[1, 2, 3, 10]
[6, 7, 8, 9, 10]


In [33]:
lst = ['parrot', 'pelican', 'lion', 'cat', 'panther', 'dolphin', 'dog']

print("lst_take_while:", list(takewhile(lambda word: word[0] == 'p', lst)))

print("lst_drop_while:", list(dropwhile(lambda word: word[0] == 'p', lst)))


lst_take_while: ['parrot', 'pelican']
lst_drop_while: ['lion', 'cat', 'panther', 'dolphin', 'dog']


#### itertools.filterfalse(predicate, seq) 

https://docs.python.org/2/library/itertools.html#itertools.ifilterfalse

Filter all elements for which the predicate is False.

In [47]:
from itertools import filterfalse

# filter *all* elements for which the predicate is false

print(list(filterfalse(lambda x: x < 5, [6, 7, 8, 9, 10, 1, 2, 3, 20])))

[6, 7, 8, 9, 10, 20]


#### itertools.groupby(seq, key=None)  
https://docs.python.org/2/library/itertools.html#itertools.groupby  
Make an iterator that returns consecutive keys and groups from the iterable.  
The key is a function computing a key value for each element.

In [48]:
from itertools import groupby
 
numbers = range(20)
# to group consecutive ones from range(20) by the same **quotient** when divided by 5; 
# i.e. for x in range(20) group by x // 5

for (key, group) in groupby(numbers, lambda x: x // 5):
    print(key, list(group))

0 [0, 1, 2, 3, 4]
1 [5, 6, 7, 8, 9]
2 [10, 11, 12, 13, 14]
3 [15, 16, 17, 18, 19]


In [49]:
# group the list of elements by the same **reminder** 
for (key, group) in groupby(numbers, lambda x: x % 5):
    print(key, list(group))

0 [0]
1 [1]
2 [2]
3 [3]
4 [4]
0 [5]
1 [6]
2 [7]
3 [8]
4 [9]
0 [10]
1 [11]
2 [12]
3 [13]
4 [14]
0 [15]
1 [16]
2 [17]
3 [18]
4 [19]


In [50]:
# how to combine the previous output by the keys?
reminder_number = [(x % 5, x) for x in numbers]
reminder_number = sorted(reminder_number)
print("reminder_number:\n%s\n" % reminder_number)

for (key, group) in groupby(reminder_number, lambda x: x[0]):
    print(key, [n for r, n in group])

reminder_number:
[(0, 0), (0, 5), (0, 10), (0, 15), (1, 1), (1, 6), (1, 11), (1, 16), (2, 2), (2, 7), (2, 12), (2, 17), (3, 3), (3, 8), (3, 13), (3, 18), (4, 4), (4, 9), (4, 14), (4, 19)]

0 [0, 5, 10, 15]
1 [1, 6, 11, 16]
2 [2, 7, 12, 17]
3 [3, 8, 13, 18]
4 [4, 9, 14, 19]


#### itertools.starmap(function, seq)
https://docs.python.org/3/library/itertools.html#itertools.starmap

Iterator that computes the function using arguments obtained from the iterable. 

In [51]:
from itertools import starmap

# here is the iterable: [(1,2), (3,4), (5,6)]

for item in starmap(lambda u, v: u + v, [(1, 2), (3, 4), (5, 6)]):
    print(item)

3
7
11


In [52]:
def add(u, v):
    return u + v
 
for item in starmap(add, [(1, 2), (3, 4), (5, 6)]):
    print(item)

3
7
11


#### itertools.tee(seq, n=2)
Creates n iterators from the given sequence

In [56]:
from itertools import tee
data = 'ABCDE'
iters = tee(data, 5)

for i in range(5):
    print('iterator:%d' % i)
    for item in iters[i]:
        print(item, end="")
    print("\n")

iterator:0
ABCDE

iterator:1
ABCDE

iterator:2
ABCDE

iterator:3
ABCDE

iterator:4
ABCDE



#### itertools.zip_longest(*seq, fillvalue=None)  
https://docs.python.org/2/library/itertools.html#itertools.izip_longest  
An iterator that aggregates elements from each of the iterables.  

In [57]:
from itertools import zip_longest

print(list(zip_longest('AB', 'xyzw', fillvalue='_')))

[('A', 'x'), ('B', 'y'), ('_', 'z'), ('_', 'w')]


In [58]:
print(list(zip_longest('AB', 'xyzw', range(5), fillvalue='_')))

[('A', 'x', 0), ('B', 'y', 1), ('_', 'z', 2), ('_', 'w', 3), ('_', '_', 4)]


In [59]:
# usefull to create dictionaries

vals = ['pqr', 'uvw', 'xyz']

dc = dict(zip_longest('1234567', vals, fillvalue='blank_value'))
print("dict:", dc)

dict: {'1': 'pqr', '2': 'uvw', '3': 'xyz', '4': 'blank_value', '5': 'blank_value', '6': 'blank_value', '7': 'blank_value'}


## Combinatoric Generators
Iterators that can be used for creating combinations and permutations of data

#### itertools.combinations(seq, r) 
#### itertools.combinations_with_replacement(seq, r)

---

E.g. There is an urn with four balls: yellow, green, red, and blue.  

Q: In how many ways can one pick two (three) balls without replacement?  

Q: In how many ways can one pick two (three) balls with replacement?

In [60]:
from itertools import combinations, combinations_with_replacement

print("combinations without replacement:\n", list(combinations('RBGY', 2))) 
print("\n")
print("combinations with replacement:\n", list(combinations_with_replacement('RBGY', 2)))

combinations without replacement:
 [('R', 'B'), ('R', 'G'), ('R', 'Y'), ('B', 'G'), ('B', 'Y'), ('G', 'Y')]


combinations with replacement:
 [('R', 'R'), ('R', 'B'), ('R', 'G'), ('R', 'Y'), ('B', 'B'), ('B', 'G'), ('B', 'Y'), ('G', 'G'), ('G', 'Y'), ('Y', 'Y')]


In [61]:
for item in combinations('RBGY', 2):
    print(''.join(item), end="; ")

RB; RG; RY; BG; BY; GY; 

In [62]:
for item in combinations_with_replacement('RBGY', 2):
    print(''.join(item), end="; ")

RR; RB; RG; RY; BB; BG; BY; GG; GY; YY; 

#### itertools.permutations(iterable, r=None)

In [63]:
from itertools import permutations

for item in permutations('RBGY', 2): ## 4*3=12
    print(''.join(item), end="; ")

RB; RG; RY; BR; BG; BY; GR; GB; GY; YR; YB; YG; 

#### itertools.product(*seq repeat=1)
Produces the **cartesian product** of sequences

In [64]:
from itertools import product

arrays = [('A', 'B'), ('a', 'b', 'c')]
cart_prod = list(product(*arrays))
print(cart_prod)

# size (cardinality) is 2 * 3  = 6


[('A', 'a'), ('A', 'b'), ('A', 'c'), ('B', 'a'), ('B', 'b'), ('B', 'c')]


In [72]:
import numpy as np

M = np.zeros((2, 2))
cids = product((0, 1), (0,1))

print("cids: %s\n" % cids)

for i, e  in enumerate(cids):
    M[e] = i * 100
    print("i = %s, e = %s" %(i, e))

print("\nM=\n%s" % M)

cids: <itertools.product object at 0x106a3cca8>

i = 0, e = (0, 0)
i = 1, e = (0, 1)
i = 2, e = (1, 0)
i = 3, e = (1, 1)

M=
[[  0. 100.]
 [200. 300.]]


In [71]:
list(product((0, 1), (0,1)))

[(0, 0), (0, 1), (1, 0), (1, 1)]

In [79]:
for i, e in enumerate(product((0, 1), (0,1))): 
    print(i,e)

0 (0, 0)
1 (0, 1)
2 (1, 0)
3 (1, 1)


In [70]:
M[0]

array([  0., 100.])

In [80]:
import numpy as np

M = np.zeros((3, 3, 3))
cids = product((0,1,2), (0,1,2), (0,1,2))


print("cids: ", cids)

for i, e  in enumerate(cids):
    M[e] = i * 100
    print("i = %2d, e = %s" %(i, e))

print("\nM=\n%s" % M)

cids:  <itertools.product object at 0x1068621b0>
i =  0, e = (0, 0, 0)
i =  1, e = (0, 0, 1)
i =  2, e = (0, 0, 2)
i =  3, e = (0, 1, 0)
i =  4, e = (0, 1, 1)
i =  5, e = (0, 1, 2)
i =  6, e = (0, 2, 0)
i =  7, e = (0, 2, 1)
i =  8, e = (0, 2, 2)
i =  9, e = (1, 0, 0)
i = 10, e = (1, 0, 1)
i = 11, e = (1, 0, 2)
i = 12, e = (1, 1, 0)
i = 13, e = (1, 1, 1)
i = 14, e = (1, 1, 2)
i = 15, e = (1, 2, 0)
i = 16, e = (1, 2, 1)
i = 17, e = (1, 2, 2)
i = 18, e = (2, 0, 0)
i = 19, e = (2, 0, 1)
i = 20, e = (2, 0, 2)
i = 21, e = (2, 1, 0)
i = 22, e = (2, 1, 1)
i = 23, e = (2, 1, 2)
i = 24, e = (2, 2, 0)
i = 25, e = (2, 2, 1)
i = 26, e = (2, 2, 2)

M=
[[[   0.  100.  200.]
  [ 300.  400.  500.]
  [ 600.  700.  800.]]

 [[ 900. 1000. 1100.]
  [1200. 1300. 1400.]
  [1500. 1600. 1700.]]

 [[1800. 1900. 2000.]
  [2100. 2200. 2300.]
  [2400. 2500. 2600.]]]
