In [21]:
# Ch 3: The Iterator Protocol
seq3 = 'the woods are lovely dark and deep'.split()
iterator = iter(seq3)
while True:
    try:
        x = iterator.__next__()
        print(x)
    except StopIteration as e:
        break


the
woods
are
lovely
dark
and
deep


In [22]:
# Iterator protocol works on lists
import sys
sequence = ['foo', 'bar']
seq_iter = iter(sequence)
print(seq_iter)
while True:
    try:
        print(seq_iter.__next__())
    except:
        print(f'Inside exception: {sys.exc_info()}')
        break


<list_iterator object at 0x00000198C93A3730>
foo
bar
Inside exception: (<class 'StopIteration'>, StopIteration(), <traceback object at 0x00000198C9795E00>)


In [23]:
# Also works on strings
seq2 = "fubar"
seq2_iter = iter(seq2)
print(seq2_iter)
while True:
    try:
        print(seq2_iter.__next__())
    except:
        print('Inside exception')
        break


<str_ascii_iterator object at 0x00000198C93A3850>
f
u
b
a
r
Inside exception


In [24]:
# Ch 4 : Iterator vs iterable
# An object that allows iteration is called an iterable. This object is required to have an __iter__ method that returns
# an iterator.
# An iterator is required to have both an __iter__ method and a __next__ method.
# An iterator is usually a new object as compared to the corresponding iterable. However, in case of self-iterators, the
# iterable and iterator are the same object.
# "range" (and "xrange" in Python 2) is an iterable but not an iterator. It has an __iter__ method but no __next__ method.
# Another difference - Iterators only iterate once, while iterables can do so repeatedly.
for method in ('__iter__', '__next__'):
    print('Method {} in range? {}'.format(method, method in dir(range)))

# Invoking iter on range object returns a rangeiterator. It is an iterator and has both __iter__ and __next__ methods.
print('='*40)
r_iter = iter(range(3))
print(r_iter)
for method in ('__iter__', '__next__'):
    print('Method {} in r_iter? {}'.format(method, method in dir(r_iter)))


Method __iter__ in range? True
Method __next__ in range? False
<range_iterator object at 0x00000198C9307510>
Method __iter__ in r_iter? True
Method __next__ in r_iter? True


In [25]:
# 4.2 Creating self iterator objects
line = '=' * 10
class Counter(object):
    def __init__(self, size):
        self.size = size
        self.start = 0
    def __iter__(self):
        print('Called __iter__', self.size)
        return self
    def __next__(self):
        if self.start < self.size:
            self.start += 1
            return self.start
        raise StopIteration

c = Counter(3)
for num in c:
    print(num)
print(line)
# Counter is a self-iterator because the __iter__ method returns the same instance. Most iterable objects- lists,
# tuples, etc. are not self-iterable, and return separate iterator instances when __iter__ is invoked.
print(c == iter(c))
print(line)
print(id(c), id(iter(c)))
print(line)
items = [1,2,3]
print(items == iter(items))
print(id(items), id(iter(items)))
print(line)


Called __iter__ 3
1
2
3
Called __iter__ 3
True
Called __iter__ 3
1755726678416 1755726678416
False
1755722819712 1755722687504


In [26]:
# 4.3 Iterator exhaustion
# An iterator is meant to be used once. When it is done - or exhausted - it raises StopIteration.
# In the example below, c3 is exhausted when called the first time!
c2 = Counter(2)
c3 = Counter(3)
for x in c2:
    print(f'x = {x}')
    for y in c3:
        print(f'\ty = {y}')

# One solution to this issue is to embed the iterator construction into the for loops. This ensures
# a new iterator is produced everytime the for loop is encountered.
print('='*10)
for x in Counter(2):
    for y in Counter(3):
        print(x, y)

# Important difference - iterators only iterate once, while iterables can do so repeatedly.


Called __iter__ 2
x = 1
Called __iter__ 3
	y = 1
	y = 2
	y = 3
x = 2
Called __iter__ 3
Called __iter__ 2
Called __iter__ 3
1 1
1 2
1 3
Called __iter__ 3
2 1
2 2
2 3


In [27]:
# 4.4. Creating an iterable
# To create a class, Counter2, that is iterable but not an iterator it would need to define an __iter__ method but
# not a __next__ method. Inside __iter__ method, Counter2 needs to return an iterator - luckily the original Counter
# serves the purpose fine. The Counter class will be renamed to CounterIterator.
CounterIterator = Counter
class Counter2(object):
    def __init__(self, size):
        self.size = size
    def __iter__(self):
        return CounterIterator(self.size)

c = Counter2(3)
for num in c:
    print(num)
print('='*10)

# It is possible to use Counter2 in nested loops, which was not possible with Counter.
rows = Counter2(2)
cols = Counter2(3)
for r in rows:
    for c in cols:
        print(r, c)
print('='*10)

# Tip: Self-iterators will exhaust. If that is an issue, make objects that are only iterable, but not
# iterators themselves. A simple test for exhaustion is creating an iterable and iterating over it twice.
c2 = Counter2(2)  # c2 is not a self-iterator because it can iterate twice
print(list(c2))
print(list(c2))
print('='*10)
c1 = Counter(2) # c1 is a self-iterator because it exhausts when iterated once
print(list(c1))
print(list(c1))
print('='*10)


1
2
3
1 1
1 2
1 3
2 1
2 2
2 3
[1, 2]
[1, 2]
Called __iter__ 2
[1, 2]
Called __iter__ 2
[]


In [28]:
# 4.5 Iterators can be infinite.
# Why use an iterator when a function (or method) that returns a list could also suffice? A list producing function
# has one potentially big drawback - it has to create all the data in the result a priori. There is potential for
# running out of memory if a large list needs to be created.
# An iterator, generating results as it goes, might incur a bit of overhead due to repeated __next__ function calls,
# but it will not gobble memory. In fact, an iterator can create an infinte series, which is impossible for a list
# producing function. An example of an infinite counter is below.
class InfiniteCounter:
    def __init__(self):
        self.cur = 0
    def __iter__(self):
        return self
    def __next__(self):
        self.cur += 1
        return self.cur

for num in InfiniteCounter():
    if num > 5:
        break
    print(num)


1
2
3
4
5


In [29]:
# Ch 5 : Generators
# Generators allow an individual function to both store state and generate each item for the sequence on demand.
# Python documentation on generator: "A function which returns an iterator... Each yield temporarily suspends processing, remembering
# the location execution state, including local variables and pending try-statements. When the generator resumes, it picks up where
# it left off, in contrast to functions which start fresh on every invocation.
# The "yield" keyword, when used within a function or method, tells the Python interpreter to create an iterator from the function.
# Differences between generators and normal functions.
# 1) Generators are not executed when they are invoked, only when they are iterated over.
# 2) Generators can be iterated over, normal functions cannot.
# 3) Generators freeze/suspend their state after a yield statement and wait until their next call. Functions target the contents
#    of their block for garbage collection after they are executed.

def simple_generator():
    print('generate')
    yield 1
    yield 2

print(simple_generator())  # Unlike normal functions, invocation does not result in execution!
for x in simple_generator():
    print(x)
print('='*10)

# Generator for the Counter class described earlier
def counter_gen(size):
    cur = 1
    while cur <= size:
        yield cur
        cur += 1

for num in counter_gen(5):
    print(num)
print('='*10)

# 5.1 - Function generators exhaust, and they do not serve well for re-use or in matrix creation
c1 = counter_gen(2)
c2 = counter_gen(3)
for x in c1:
    for y in c2:
        print(x, y)
print('='*10)

# 5.2 - Generators return a generator object. This object has both an __iter__ method and a __next__ method.
gen = counter_gen(3)
print(gen)
for method in ('__iter__', '__next__'):
    print(f'{method} in dir(gen)? {method in dir(gen)}')

# Invoking __iter__ on a generator will return the same generator object instance. Generators are self-iterators.
iter(gen) == gen.__iter__() == gen
print('='*10)

# 5.3 Generators can be infinite
def gen_forever():
    i = 1
    while True:
        yield i
        i += 1

for num in gen_forever():
    if num > 3:
        break
    print(num)
print('='*10)

# 5.4 "return" stops generation
# A "return" statement in a generator will cause the loop to exit. The following generator will never iterate to 3.
def gen_with_return():
    yield 1
    yield 2
    return
    yield 3

for num in gen_with_return():
    print(num)


<generator object simple_generator at 0x00000198C93C5180>
generate
1
2
1
2
3
4
5
1 1
1 2
1 3
<generator object counter_gen at 0x00000198C9790040>
__iter__ in dir(gen)? True
__next__ in dir(gen)? True
1
2
3
1
2


In [30]:
# Ch. 6 - Object Generators
# Not only can functions generate, but methods can as well. There are two common ways to use generators with objects.
# 1) By returning a generator from the __iter__ method, an object can be iterable. This allows an object instance to be iterated over.
# 2) Because generators are iterators, any method that is a generator can be iterated over. This allows iteration over a method.
class Counter3:
    def __init__(self, size):
        self.size = size
    def __iter__(self):
        cur = 1
        while cur <= self.size:
            yield cur
            cur += 1

for x in Counter3(3):
    print(x)
print('='*10)

# Object generators implemented by __iter__ are reusable if they do not attach state to the instance.
c1 = Counter3(3)
for x in c1:
    print(x)
for x in c1:
    print(x)
print('='*10)

# This object will not fall prey to the nested loop matrix creation issue, seen with self-iterators.
c2 = Counter3(2)
c3 = Counter3(3)
for x in c2:
    for y in c3:
        print(x, y)
print('='*10)

# Note that this was not the case with function generators. Because generators created in functions exhaust,
# they cannot be used in this nested situation.
# Function generators are self-iterators, while the above object creates a new generator instance everytime
# a Counter3 object is iterated over. The __iter__ method (which creates the generator) is called during looping.
# In contrast with a function generator, which was created outside of the loops in previous examples.

# Every new iteration over c3 will result in a new generator being created (i.e. __iter__ is called again), so
# there is not need to worry about exhaustion here.
iter(c2) == c2


1
2
3
1
2
3
1
2
3
1 1
1 2
1 3
2 1
2 2
2 3


False

In [31]:
# However, if the state of the generator is stored as attributes on the object instance, then problems arise.
class Counter4:
    def __init__(self, size):
        self.size = size
        self.cur = 1    # <== the state of cur is attached to the instance, hence it will traverse only once!
    def __iter__(self):
        while self.cur <= self.size:
            yield self.cur
            self.cur += 1

c2 = Counter4(2)
c3 = Counter4(3)
for x in c2:
    for y in c3:
        print(x, y)


1 1
1 2
1 3


In [32]:
# In addition to implementing __iter__, any method that is a generator may be iterated over.
class Counter5:
    def __init__(self, size):
        self.size = size
    def count(self):
        cur = 1
        while cur <= self.size:
            yield cur
            cur += 1

# Generator methods perform similarly to generator functions. They are self-iterators.
c5 = Counter5(2)
c5_gen = c5.count()
iter(c5_gen) == c5_gen

# As such, generator methods behave slightly differently than __iter__ generators, and cannot
# simply serve as a drop-in replacement.
c2 = Counter5(2).count()
c3 = Counter5(3).count()
for x in c2:
    for y in c3:
        print(x, y)
print('='*10)

# For the matrix creation example to work, count needs to be invoked inside of the for loop.
c2 = Counter5(2)
c3 = Counter5(3)
for x in c2.count():
    for y in c3.count():
        print(x, y)


1 1
1 2
1 3
1 1
1 2
1 3
2 1
2 2
2 3


In [33]:
# Ch. 7 - Generators in practice
# When is it appropriate to use a generator? A generator can replace any function that returns a list.
def counter_list(size):
    results = []
    cur = 1
    while cur <= size:
        results.append(cur)
        cur += 1
    return results
def counter_gen(size):
    cur = 1
    while cur <= size:
        yield cur
        cur += 1
list(counter_gen(100)) == counter_list(100)

# 7.1 Generators exhaust
# Unlike lists, range, xrange, and non-self iterators, generator functions do not play well with reuse.
# Once they are used, they usually cannot be reused.
five = counter_gen(5)
print([x for x in five])
print([x for x in five])
print('='*10)
five_r = range(1,6)
print([x for x in five_r])
print([x for x in five_r])
print('='*10)

# 7.2 Chaining generators
# Another common use of a generator is to act as a filter on sequences. A generator can be useful for
# a reusable chunk of code to perform filtering or manipulation of sequences. Once armed with a few
# generators, they can be chained together to apply their logic to a sequence.
def positive(seq):
    for x in seq:
        if x >= 0:
            yield x
def every_other(gen):
    for x in gen:
        yield x
        try:
            gen.__next__()
        except:
            pass
def double(seq):
    for x in seq:
        yield x
        yield x
seq = range(-5, 5)
pos = positive(seq)
skip = every_other(pos)
two = double(skip)
print([x for x in two])
print('='*10)

# The previous example could be written in a couple of different ways.
# 1) A single generator could perform all the manipulation logic at once, but the code
#    is harder to understand than the separate functions.
def do_foo(seq):
    pos_count = 0
    for x in seq:
        if x >= 0:
            if pos_count % 2 == 0:
                yield x
                yield x
            pos_count += 1
print(list(do_foo(range(-5, 5))))
print('='*10)

# 2) If chaining these filters together is common, it is cleaner to create the unique generators, positve,
#    every_other, and double, then define do_foo as follows.
def do_foo(seq):
    seq = positive(seq)
    seq = every_other(seq)
    seq = double(seq)
    for x in seq:
        yield x
print(list(do_foo(range(-5, 5))))
print('='*10)

# 3) An alternative functional spelling of do_foo nests the generators
def do_foo(seq):
    for x in double(every_other(positive(seq))):
        yield x
print(list(do_foo(range(-5, 5))))
print('='*10)


[1, 2, 3, 4, 5]
[]
[1, 2, 3, 4, 5]
[1, 2, 3, 4, 5]
[0, 0, 2, 2, 4, 4]
[0, 0, 2, 2, 4, 4]
[0, 0, 2, 2, 4, 4]
[0, 0, 2, 2, 4, 4]


In [34]:
# 7.3 Due to the lazy nature of sequence generation, generators can be tricky to debug.
# 7.4 Generators do not index or slice. Lists do.
pos = positive(range(-5, 5))
try:
    print('Trying to fetch pos[1], where pos is a generator')
    print(pos[1])
except:
    print(f'In exception: {sys.exc_info()}')
print('='*10)

# The itertools.islice() function replicates a portion of the slicing functionality.
# Rather than returning a list, islice returns an iterable object.
from itertools import islice
seq = islice(pos, 1, 3)
print(f'seq = {seq}')
print(seq.__next__())
print(seq.__next__())
try:
    seq.__next__()
except:
    print(f'In exception: {sys.exc_info()}')

# Another note: Negative slicing is not available with generators nor with islice. Again, because of possible
# infinite lengths of generated items, there is no mechanism for starting at the end and counting backwards.

# 7.5 Generators have no inherent length. The only way to count the items in a generator is to iterate over them.


Trying to fetch pos[1], where pos is a generator
In exception: (<class 'TypeError'>, TypeError("'generator' object is not subscriptable"), <traceback object at 0x00000198C937A800>)
seq = <itertools.islice object at 0x00000198C97822A0>
1
2
In exception: (<class 'StopIteration'>, StopIteration(), <traceback object at 0x00000198C937B700>)


In [35]:
# 7.6 Generators may be slower.
# This is so because looping over them requires using the Iteration Protocol, which calls __next__() at every step.
def iter_list():
    for x in [0,1,2,3,4,5]:
        pass
def iter_gen():
    def gen():
        yield 0
        yield 1
        yield 2
        yield 3
        yield 4
        yield 5
    for x in gen():
        pass

import timeit
t = timeit.Timer('iter_list()', setup='from __main__ import iter_list')
print('Iteration over a list')
print(t.timeit())
print('='*10)

t = timeit.Timer('iter_gen()', setup='from __main__ import iter_gen')
print('Iteration over a generator')
print(t.timeit())
print('='*10)


Iteration over a list
0.3076887000352144
Iteration over a generator
0.9639634999912232


In [36]:
# 7.7 Generators may consume less memory.
# Because generators are lazy they (potentially) require much less memory than lists when dealing with large numbers of items.
# If memory is limited and single traversal is not a problem, then generators may be an appropriate solution for conserving memory.

# 7.8 Generators are always "True"
# Idiomatic Python suggests that checking if a sequence implicitly evaluates to a boolean is cleaner than casting it to a bool
# or checking that the length is greater than zero. Unlike lists, where that behavior holds, a generator always evaluates to True.
def odd_list(seq):
    results = []
    for x in seq:
        if x % 2 == 1:
            results.append(x)
    return results

if odd_list([0, 2, 4]):
    print('Found odd')
else:
    print('No odd')
print(bool(odd_list([0, 2, 4])))

print('='*20)
def odd_gen(seq):
    for x in seq:
        if x % 2 == 1:
            yield x

if odd_gen([0, 2, 4]):
    print('Found odd')
else:
    print('No odd')
print(bool(odd_gen([0, 2, 4])))
print('='*20)


No odd
False
Found odd
True


In [37]:
# 7.9 A non-generator: range
# The built-in function range() is *not* a generator. A few differences are below.
# 1) range function can be indexed but not sliced.
to_ten = range(1, 11)
print(to_ten[2])
print(to_ten[1:3])

# 2) The range function has no __next__() method
print('__next__' in dir(range))

# 3) It is an iterable but not an iterator itself
print(iter(to_ten))

# 4) Because range is an iterable it does not exhaust and can be iterated over many times.
print(list(to_ten))
print(list(to_ten))
print('='*20)
# range is also lazy about sequence creation, but it is not a generator. They behave slightly differently.

# 7.10 A generator in collections module: OrderedDict
# The OrderedDict class uses a generator in its __iter__ method. Because very minimal state is stored in the
# __iter__ method, it can be used to create a matrix.
from collections import OrderedDict
d = OrderedDict()
for i in range(1, 4):
    d[i] = i
for x in d:
    for y in d:
        print(x, y)


3
range(2, 4)
False
<range_iterator object at 0x00000198C9307A30>
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
1 1
1 2
1 3
2 1
2 2
2 3
3 1
3 2
3 3


In [38]:
# Ch. 8: Lists or generators?
# 8.1 Repeated use of data
#     If access to items in a sequence is needed repeatedly, a generator alone cannot provide that access. Use a list or
#     convert the generator to a list in that case.

# 8.2 Data fits in memory
#     If the complete sequence fits in memory, a list may prove to be slightly faster than a generator. Remember that
#     repeated function overhead invoked for every item in the sequence does not come for free. Test with timeit module.

# 8.3 Operating on the sequence
#     If operations on the whole sequence need to be performed, such as "len" or "reversed", then a generator is probably
#     not the right choice. While "len" or "sorted" will work on non-infinite generator, "reversed" will fail.

# 8.4 Conversion between lists and generators
#     A list can always be converted into a generator. The reverse it *not* true - generators can be infinite. Use the
#     built-in "list" function if generator is non-infinite and memory is sufficient.
def identity_gen(seq):
    for x in seq:
        yield x

print([0, 1] == list(identity_gen([0, 1])))

# Note in Python 3 many of the iteration semantics have become more lazy e.g. range, zip etc.
# Another example is dict.keys() and dict.values() methods. They used to return lists in Python 2.
# In Python 3, dict.keys() returns a "set-like object providing a view of of the dictionary's keys".
print('='*20)
hsh = {'a': 1, 'b': 2, 'c': 3}
print(type(hsh.keys()))  # Returns "<type 'list'>" in Python 2
try:
    print(hsh.keys()[1])     # Returns 'c' in Python 2, but it cannot be indexed in Python 3
except:
    print('In exception!')
try:
    print(hsh.keys()[1:3])     # Returns ['c', 'b'] in Python 2, but it cannot be sliced in Python 3
except:
    print('In exception!')


True
<class 'dict_keys'>
In exception!
In exception!


In [39]:
# Ch. 9: Real World Uses

# 9.1 Database chunking
# 9.2 Recursive generators
# 9.3 Examples from text tweaking

# ============================================================================================================
# A few other real-life use cases where we could use lazy evaluation technique of generators is as follows:
# 1) Reading large files
# 2) Unix-like piping of several tasks each of which does one and only one thing perfectly
# 3) Buffered fetch - when it is efficient to fetch data in chunks and process one chunk per iteration

