# itertools module
- collection of advanced iteration tools
- [itertools](https://docs.python.org/3.6/library/itertools.html)

In [1]:
import itertools

In [2]:
# cartesian product

for j,k in itertools.product(['jack','jill'], 
                       ['hill', 'up', 'water']):
    print(j,k)

jack hill
jack up
jack water
jill hill
jill up
jill water


In [3]:
# get all four bit ints

for p in itertools.product([0,1], repeat=4):
    print(p)

(0, 0, 0, 0)
(0, 0, 0, 1)
(0, 0, 1, 0)
(0, 0, 1, 1)
(0, 1, 0, 0)
(0, 1, 0, 1)
(0, 1, 1, 0)
(0, 1, 1, 1)
(1, 0, 0, 0)
(1, 0, 0, 1)
(1, 0, 1, 0)
(1, 0, 1, 1)
(1, 1, 0, 0)
(1, 1, 0, 1)
(1, 1, 1, 0)
(1, 1, 1, 1)


# easily break out of 'nested loop'

In [4]:
# product goes thru the cartesian product 
# in linear order

for x,y in itertools.product(range(4), range(10,13)):
    print(x,y)
    if x == 3 and y == 11:
        break

0 10
0 11
0 12
1 10
1 11
1 12
2 10
2 11
2 12
3 10
3 11


# groupby
- something like linux 'uniq' command 
- lazy function

In [5]:
lst = [1,2,3,1,1,2,1,3,3,3,7,3]
for k, g in itertools.groupby(sorted(lst)):
    print(k , list(g))


1 [1, 1, 1, 1]
2 [2, 2]
3 [3, 3, 3, 3, 3]
7 [7]


In [6]:
for k, g in itertools.groupby(lst):
    print(k , list(g))


1 [1]
2 [2]
3 [3]
1 [1, 1]
2 [2]
1 [1]
3 [3, 3, 3]
7 [7]
3 [3]


In [7]:
[ (k, len(list(g))) for k, g in itertools.groupby( lst) ] 

[(1, 1), (2, 1), (3, 1), (1, 2), (2, 1), (1, 1), (3, 3), (7, 1), (3, 1)]

# chain
- takes an arbitrary number of args,
- and iterates over each arg, from left to right
- note chain can take any mix of iterable types
- lazy function

In [8]:
def gen():
    for j in range(5):
        yield(j)

[t for t in itertools.chain('foo', gen(), ('b','a','r'))]

['f', 'o', 'o', 0, 1, 2, 3, 4, 'b', 'a', 'r']

In [9]:
# takes one iterable arg, and iterates over each element

list(itertools.chain.from_iterable(('foo', [1,2,3],'bar')))

['f', 'o', 'o', 1, 2, 3, 'b', 'a', 'r']

# compress

In [10]:
# similiar to numpy boolean indexing

list(itertools.compress(range(5), [1,0,0,1,0]))

[0, 3]

# Dot product!

In [11]:
# a padded dot product 

list(itertools.zip_longest([1,2,3,4], [1], [4,5], 
                           fillvalue=10))

[(1, 1, 4), (2, 10, 5), (3, 10, 10), (4, 10, 10)]

# combinations 
- iterates over all possible subsets of a given size that can be made from an iterable
- remember that sets are not ordered, so would not see (0,1,2) and (2,1,0) in output
- can make subsets with or without replacement
- lazy function 

In [12]:
for a,b,c in itertools.combinations(range(4), 3):
    print(a,b,c)

0 1 2
0 1 3
0 2 3
1 2 3


In [13]:
# list of iterables

x = [1, 2, 3]

g=(itertools.combinations(x, r) for r in range(len(x)+1))

next(g)

<itertools.combinations at 0x1077404d0>

In [14]:
list((itertools.combinations(x, r) for r in range(len(x)+1)))

[<itertools.combinations at 0x107740d10>,
 <itertools.combinations at 0x107740e30>,
 <itertools.combinations at 0x107740e90>,
 <itertools.combinations at 0x107740ef0>]

In [15]:
# lazyness gets out of control sometimes!
# power sets

list(map(list, (itertools.combinations(x, r)\
                for r in range(len(x)+1))))

[[()], [(1,), (2,), (3,)], [(1, 2), (1, 3), (2, 3)], [(1, 2, 3)]]

In [16]:
[list(itertools.combinations(x, r))\
                for r in range(len(x)+1)]

[[()], [(1,), (2,), (3,)], [(1, 2), (1, 3), (2, 3)], [(1, 2, 3)]]

In [17]:
# power sets again
# maybe a little nicer

list(itertools.chain.from_iterable\
     (itertools.combinations(x, r)\
      for r in range(len(x)+1)))

[(), (1,), (2,), (3,), (1, 2), (1, 3), (2, 3), (1, 2, 3)]

In [18]:
# no replacements

list(itertools.combinations(range(3), 3))

[(0, 1, 2)]

In [19]:
list(itertools.combinations_with_replacement(range(3), 3))

[(0, 0, 0),
 (0, 0, 1),
 (0, 0, 2),
 (0, 1, 1),
 (0, 1, 2),
 (0, 2, 2),
 (1, 1, 1),
 (1, 1, 2),
 (1, 2, 2),
 (2, 2, 2)]

# permutations
- order DOES matter
- lazy function

In [20]:
list(itertools.permutations(range(3)))

[(0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)]

In [21]:
list(itertools.permutations(range(3), 2))

[(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]

In [22]:
# repeat generates infinite sequence of one value

g = itertools.repeat(2)
for e in range(4):
    print(next(g))

2
2
2
2


In [23]:
# repeats indefinitely

c = itertools.cycle('larry')

[ next(c) for j in range(13) ]

['l', 'a', 'r', 'r', 'y', 'l', 'a', 'r', 'r', 'y', 'l', 'a', 'r']

In [24]:
# can use repeat with zip, 
# because zip terminates when one sequence terminates

[b**e for b,e in zip(g, range(4))]

[1, 2, 4, 8]

In [25]:
# count produces an infinite sequence
# count is lazy

for j,c in enumerate(itertools.count(start=3, step=5)):
    if j > 10:
        break
    print(j, c)


0 3
1 8
2 13
3 18
4 23
5 28
6 33
7 38
8 43
9 48
10 53


# 'slices' of generators

In [26]:
# takewhile takes elements from begining of 
# a sequence until predicate fails

g = itertools.takewhile(lambda x: x < 30, 
                        itertools.count(start=3, step=5))
list(g)

[3, 8, 13, 18, 23, 28]

In [27]:
# dropwhile drops some number of items 
# at the begining of a sequence

g = itertools.dropwhile(lambda x: x < 10, 
                        itertools.count(start=3, step=5))
[ next(g) for j in range(10) ]

[13, 18, 23, 28, 33, 38, 43, 48, 53, 58]

In [28]:
# since count is infinite, g is infinite

next(g)

63

In [29]:
# lets you take a slice of a generator

list(itertools.islice(itertools.count(start=100),
                      4, 10, 2 ))

[104, 106, 108]

In [30]:
# running total
# lazy

list(itertools.accumulate([1,4,7,4,3,1,2,9]))

[1, 5, 12, 16, 19, 20, 22, 31]

# Also some useful routines in collections module
- [doc](https://docs.python.org/3.7/library/collections.html)

In [31]:
from collections import *

import random

# Counter

In [32]:
c = Counter()

for j in range(100):
    k = random.randint(10,30)
    c[k] += 1
    
c

Counter({27: 7,
         14: 5,
         24: 7,
         22: 3,
         10: 10,
         25: 5,
         17: 6,
         15: 10,
         12: 5,
         28: 2,
         26: 5,
         23: 5,
         11: 6,
         21: 3,
         13: 4,
         29: 5,
         20: 3,
         18: 5,
         30: 1,
         16: 2,
         19: 1})

In [33]:
c + c

Counter({27: 14,
         14: 10,
         24: 14,
         22: 6,
         10: 20,
         25: 10,
         17: 12,
         15: 20,
         12: 10,
         28: 4,
         26: 10,
         23: 10,
         11: 12,
         21: 6,
         13: 8,
         29: 10,
         20: 6,
         18: 10,
         30: 2,
         16: 4,
         19: 2})

# deque
- fast appends and pops on both sides


In [34]:
d = deque()
d

deque([])

In [35]:
d.append(333)

In [36]:
d.appendleft(46)
d

deque([46, 333])

In [37]:
d.extend([2,3,4,5])
d

deque([46, 333, 2, 3, 4, 5])

In [38]:
d.pop()
d

deque([46, 333, 2, 3, 4])

In [39]:
d.popleft()

46

# defaultdict


In [40]:
d = {}

In [41]:
d[3]

KeyError: 3

In [42]:
dd = defaultdict(int)
dd

defaultdict(int, {})

In [43]:
dd[0]

0

In [44]:
dd

defaultdict(int, {0: 0})

In [45]:
dd = defaultdict(list)
dd

defaultdict(list, {})

In [46]:
dd[0]

[]

In [47]:
dd

defaultdict(list, {0: []})