### Sentence

In [1]:
import re
import reprlib

RE_WORD = re.compile(r'\w+')
                     

class Sentence:
    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)
        
    def __getitem__(self, index):
        return self.words[index]
    
    def __len__(self):
        return len(self.words)

    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)  

### iterator
Any object that implements the \_\_next__ no-argument method that returns the next item in a series or raises StopIteration when there are no more items. Python iterators also implement the \_\_iter__ method so they are iterable as well.


In [2]:
import re
import reprlib

RE_WORD = re.compile(r'\w+')
                     

class Sentence:
    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)
        
    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)  
    
    def __iter__(self):
        for word in self.words:
            yield word

### lazy sentence

In [3]:
import re
import reprlib

RE_WORD = re.compile(r'\w+')


class Sentence:

    def __init__(self, text):
        self.text = text

    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)

    def __iter__(self):
        return (match.group() for match in RE_WORD.finditer(self.text))

In [4]:
s = Sentence('Hallo, und Guten Tag!')
list(s)

['Hallo', 'und', 'Guten', 'Tag']

### ArithmeticProgression

All three Versions Produce more or less the same result.

In [5]:
import itertools

class ArithmeticProgression:

    def __init__(self, begin, step, end=None):
        self.begin = begin
        self.step = step
        self.end = end  # None -> "infinite" series

    def __iter__(self):
        result_type = type(self.begin + self.step)
        result = result_type(self.begin)
        forever = self.end is None
        index = 0
        while forever or result < self.end:
            yield result
            index += 1
            result = self.begin + self.step * index
            
def aritprog_gen(begin, step, end=None):
    result = type(begin + step)(begin)
    forever = end is None
    index = 0
    while forever or result < end:
        yield result
        index += 1
        result = begin + step * index

def aritprog_gen(begin, step, end=None):
    first = type(begin + step)(begin)
    ap_gen = itertools.count(first, step)
    if end is not None:
        ap_gen = itertools.takewhile(lambda n: n < end, ap_gen)
    return ap_gen

## stlib generators
### filtering generator functions

In [6]:
import os
from os.path import join, getsize
for root, dirs, files in os.walk('D:\Programming\Documentation'):
    print(root, "consumes", end=" ")
    print(sum(getsize(join(root, name)) for name in files), end=" ")
    print("bytes in", len(files), "non-directory files")

D:\Programming\Documentation consumes 0 bytes in 0 non-directory files
D:\Programming\Documentation\BDA consumes 0 bytes in 0 non-directory files
D:\Programming\Documentation\Big Data consumes 1585371 bytes in 19 non-directory files
D:\Programming\Documentation\Big Data\Praktikumsberichte consumes 803008 bytes in 5 non-directory files
D:\Programming\Documentation\cloud native application engineering consumes 882662 bytes in 3 non-directory files
D:\Programming\Documentation\data mining consumes 302501 bytes in 6 non-directory files
D:\Programming\Documentation\machine learning wiki consumes 0 bytes in 0 non-directory files
D:\Programming\Documentation\machine learning wiki\pictures consumes 162658 bytes in 7 non-directory files
D:\Programming\Documentation\nlp consumes 38952 bytes in 2 non-directory files
D:\Programming\Documentation\textundwebmining consumes 98594 bytes in 3 non-directory files


In [7]:
def vowel(c):
    return c.lower() in 'aeiou'

list(filter(vowel, 'Aardvark'))

['A', 'a', 'a']

In [8]:
list(itertools.filterfalse(vowel, 'Aardvark'))

['r', 'd', 'v', 'r', 'k']

In [9]:
list(itertools.dropwhile(vowel, 'Aardvark'))

['r', 'd', 'v', 'a', 'r', 'k']

In [10]:
list(itertools.takewhile(vowel, 'Aardvark'))

['A', 'a']

In [11]:
list(itertools.compress('Aardvark', (1,0,1,1,0,1)))

['A', 'r', 'd', 'a']

In [12]:
list(itertools.islice('Aardvark', 4))

['A', 'a', 'r', 'd']

In [13]:
list(itertools.islice('Aardvark', 4, 7))

['v', 'a', 'r']

In [14]:
list(itertools.islice('Aardvark', 1, 7, 2))

['a', 'd', 'a']

### accumulate

In [15]:
sample = [5, 4, 2, 8, 7, 6, 3, 0, 9, 1]
list(itertools.accumulate(sample))

[5, 9, 11, 19, 26, 32, 35, 35, 44, 45]

In [16]:
list(itertools.accumulate(sample, min))

[5, 4, 2, 2, 2, 2, 2, 0, 0, 0]

In [17]:
list(itertools.accumulate(sample, max))

[5, 5, 5, 8, 8, 8, 8, 8, 9, 9]

In [18]:
import operator
list(itertools.accumulate(sample, operator.mul))

[5, 20, 40, 320, 2240, 13440, 40320, 0, 0, 0]

In [19]:
list(itertools.accumulate(range(1, 11), operator.mul))

[1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800]

### mapping iterators

In [20]:
list(enumerate('albatroz', 1))

[(1, 'a'),
 (2, 'l'),
 (3, 'b'),
 (4, 'a'),
 (5, 't'),
 (6, 'r'),
 (7, 'o'),
 (8, 'z')]

In [21]:
list(map(operator.mul, range(11), range(11)))

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

In [22]:
list(map(operator.mul, range(11), [2, 4, 8]))

[0, 4, 16]

In [23]:
list(map(lambda a, b: (a, b), range(11), [2, 4, 8]))

[(0, 2), (1, 4), (2, 8)]

In [24]:
list(itertools.starmap(operator.mul, enumerate('albatroz', 1)))

['a', 'll', 'bbb', 'aaaa', 'ttttt', 'rrrrrr', 'ooooooo', 'zzzzzzzz']

In [25]:
#running average
list(itertools.starmap(lambda a, b: b/a, 
                       enumerate(itertools.accumulate(sample), 1)))

[5.0,
 4.5,
 3.6666666666666665,
 4.75,
 5.2,
 5.333333333333333,
 5.0,
 4.375,
 4.888888888888889,
 4.5]

### merging generators

In [26]:
list(itertools.chain('ABC', range(2)))

['A', 'B', 'C', 0, 1]

In [27]:
list(itertools.chain(enumerate('ABC')))

[(0, 'A'), (1, 'B'), (2, 'C')]

In [28]:
list(itertools.chain.from_iterable(enumerate('ABC')))

[0, 'A', 1, 'B', 2, 'C']

In [29]:
list(zip('ABC', range(5)))

[('A', 0), ('B', 1), ('C', 2)]

In [30]:
list(zip('ABC', range(5), [10, 20, 30, 40]))

[('A', 0, 10), ('B', 1, 20), ('C', 2, 30)]

In [31]:
list(itertools.zip_longest('ABC', range(5)))

[('A', 0), ('B', 1), ('C', 2), (None, 3), (None, 4)]

In [32]:
list(itertools.zip_longest('ABC', range(5), fillvalue='?'))

[('A', 0), ('B', 1), ('C', 2), ('?', 3), ('?', 4)]

### product

In [33]:
list(itertools.product('ABC', range(2), repeat=1))

[('A', 0), ('A', 1), ('B', 0), ('B', 1), ('C', 0), ('C', 1)]

In [34]:
suits = 'spades hearts diamonds clubs'.split()
list(itertools.product('AK', suits))

[('A', 'spades'),
 ('A', 'hearts'),
 ('A', 'diamonds'),
 ('A', 'clubs'),
 ('K', 'spades'),
 ('K', 'hearts'),
 ('K', 'diamonds'),
 ('K', 'clubs')]

In [35]:
list(itertools.product('ABC'))

[('A',), ('B',), ('C',)]

In [36]:
list(itertools.product('ABC', repeat=2))

[('A', 'A'),
 ('A', 'B'),
 ('A', 'C'),
 ('B', 'A'),
 ('B', 'B'),
 ('B', 'C'),
 ('C', 'A'),
 ('C', 'B'),
 ('C', 'C')]

In [37]:
list(itertools.product(range(2), repeat=3))

[(0, 0, 0),
 (0, 0, 1),
 (0, 1, 0),
 (0, 1, 1),
 (1, 0, 0),
 (1, 0, 1),
 (1, 1, 0),
 (1, 1, 1)]

In [38]:
rows = itertools.product('AB', range(2), repeat=2)
for row in rows: print(row)

('A', 0, 'A', 0)
('A', 0, 'A', 1)
('A', 0, 'B', 0)
('A', 0, 'B', 1)
('A', 1, 'A', 0)
('A', 1, 'A', 1)
('A', 1, 'B', 0)
('A', 1, 'B', 1)
('B', 0, 'A', 0)
('B', 0, 'A', 1)
('B', 0, 'B', 0)
('B', 0, 'B', 1)
('B', 1, 'A', 0)
('B', 1, 'A', 1)
('B', 1, 'B', 0)
('B', 1, 'B', 1)


### input expanding generators

In [39]:
ct = itertools.count()
next(ct)

0

In [40]:
next(ct), next(ct), next(ct)

(1, 2, 3)

In [41]:
list(itertools.islice(itertools.count(1, .3), 3))

[1, 1.3, 1.6]

In [42]:
cy = itertools.cycle('ABC')
next(cy)

'A'

In [43]:
list(itertools.islice(cy, 7))

['B', 'C', 'A', 'B', 'C', 'A', 'B']

In [44]:
rp = itertools.repeat(7)
next(rp), next(rp)

(7, 7)

In [45]:
list(itertools.repeat(8, 4))

[8, 8, 8, 8]

In [46]:
list(map(operator.mul, range(11), itertools.repeat(5)))  

[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

### combinatoric generator functions

In [47]:
list(itertools.combinations('ABC', 2))

[('A', 'B'), ('A', 'C'), ('B', 'C')]

In [48]:
list(itertools.combinations_with_replacement('ABC', 2))

[('A', 'A'), ('A', 'B'), ('A', 'C'), ('B', 'B'), ('B', 'C'), ('C', 'C')]

In [49]:
list(itertools.permutations('ABC', 3))

[('A', 'B', 'C'),
 ('A', 'C', 'B'),
 ('B', 'A', 'C'),
 ('B', 'C', 'A'),
 ('C', 'A', 'B'),
 ('C', 'B', 'A')]

In [50]:
list(itertools.product('ABC', repeat=2))

[('A', 'A'),
 ('A', 'B'),
 ('A', 'C'),
 ('B', 'A'),
 ('B', 'B'),
 ('B', 'C'),
 ('C', 'A'),
 ('C', 'B'),
 ('C', 'C')]

### Rearranging generator functions

In [51]:
list(itertools.groupby('LLLLAAGGG'))

[('L', <itertools._grouper at 0x1aa4f9ea0a0>),
 ('A', <itertools._grouper at 0x1aa4f9eadf0>),
 ('G', <itertools._grouper at 0x1aa4f9eaf70>)]

In [52]:
for char, group in itertools.groupby('LLLLAAAGG'):
    print(char, '->', list(group))

L -> ['L', 'L', 'L', 'L']
A -> ['A', 'A', 'A']
G -> ['G', 'G']


In [53]:
animals = ['duck', 'eagle', 'rat', 'giraffe', 'bear',
           'bat', 'dolphin', 'shark', 'lion']
#for groupby needs to be sorted by criterion
animals.sort(key=len)
animals

['rat', 'bat', 'duck', 'bear', 'lion', 'eagle', 'shark', 'giraffe', 'dolphin']

In [54]:
for length, group in itertools.groupby(animals, len):
    print(length, '->', list(group))

3 -> ['rat', 'bat']
4 -> ['duck', 'bear', 'lion']
5 -> ['eagle', 'shark']
7 -> ['giraffe', 'dolphin']


In [55]:
for length, group in itertools.groupby(reversed(animals), len):
    print(length, '->', list(group))

7 -> ['dolphin', 'giraffe']
5 -> ['shark', 'eagle']
4 -> ['lion', 'bear', 'duck']
3 -> ['bat', 'rat']


### tee

In [56]:
list(itertools.tee('ABC'))

[<itertools._tee at 0x1aa4faced80>, <itertools._tee at 0x1aa4faa0880>]

In [57]:
g1, g2 = itertools.tee('ABC')

In [58]:
next(g1)

'A'

In [59]:
next(g2)

'A'

In [60]:
next(g2)

'B'

In [61]:
list(g1)

['B', 'C']

In [62]:
list(g2)

['C']

In [63]:
list(zip(*itertools.tee('ABC')))

[('A', 'A'), ('B', 'B'), ('C', 'C')]

## Subgenerators with yield from

In [64]:
def sub_gen():
    yield 1
    yield 2
    
def gen():
    yield 0
    yield from sub_gen()
    yield 3
    
for i in gen():
    print(i)

0
1
2
3


In [65]:
def sub_gen():
    yield 1
    yield 2
    return 'Done!'
    
def gen():
    yield 0
    result = yield from sub_gen()
    print('<--', result)
    yield 3
    
for i in gen():
    print(i)

0
1
2
<-- Done!
3


In [66]:
def tree(cls, level=0, depth=2):
    yield cls.__name__, level
    for sub_cls in cls.__subclasses__():
        if level < depth:
            yield from tree(sub_cls, level+1, depth)
    
def display(cls):
    for cls_name, level in tree(cls, depth=1):
        indent = ' ' * 4 * level
        print(f'{indent}{cls_name}')

display(BaseException)

BaseException
    Exception
    GeneratorExit
    SystemExit
    KeyboardInterrupt
    CancelledError


### Iterable Reducing Functions

In [67]:
all([1, 2, 3])

True

In [68]:
all([1, 0, 3])

False

In [69]:
all([])

True

In [70]:
any([1, 2, 3])

True

In [71]:
any([1, 0, 3])

True

In [72]:
any([0, 0.0])

False

In [73]:
any([])

False

In [74]:
g = (n for n in [0, 0.0, 7, 8])
any(g)

True

In [75]:
next(g)

8

## A Closer Look at the iter Function

In [76]:
from random import randint
def d6():
    return randint(1, 6)

for i in iter(d6, 1):
    print(i)

In [77]:
f = open('mydata.txt', 'w')
f.write('Now the file has content!')
f.close()


In [78]:
from functools import partial
with open('mydata.txt') as fp:
    # carefull if we 'skip' over the ! it will run forever
    for line in iter(partial(fp.read, 3), '!'):
        print(line)

Now
 th
e f
ile
 ha
s c
ont
ent
