# Generators

## Yielding

When Python calls a function and finds the `yield` statement, it treats the function now as a generator function.
- It runs until the yield statement is encountered. No output given.
- When calling `next()` on the function all output is printed and whatever is in the yield statement gets returned.
- The function stops until `next()` gets called again on it.
- it all stops when either a `return` is encountered or the execution finishes (a `StopIteration` exception gets raised with the value of the return).

It is important to note that Python now considers the function with the `yield` statement as a generator factory, as calling the function returns a generator object.    

Also, generators are actually iterators, and they implement the iterator protocol. (`__iter__` and `__next__` are implemented).

In [11]:
# this is an iterator (implements the iterator protocol)
import math 

print("---- ITERATOR PROTOCOL ----")

class FactIter:
  def __init__(self, n):
    self.n = n
    self.i = 0

  def __iter__(self):
    return self

  def __next__(self):
    if self.i >= self.n:
      raise StopIteration
    else:
      result = math.factorial(self.i)
      self.i += 1
      return result

fact_iter = FactIter(5)
print(f"the iterator fact_iter: {fact_iter}")
for item in fact_iter:
  print(item)

print("---- ITER SECOND FORM")

def fact():
  i = 0
  def _fact():
    nonlocal i
    result = math.factorial(i)
    i += 1
    return result
  return _fact

f = fact()
fact_iter_2 = iter(f, math.factorial(5))

print(f"the iterator fact_iter_2: {fact_iter_2}")
for item in fact_iter_2:
  print(item)

# These two are ways of creating iterables though by using a lot of code.
print("---- GENERATOR FUNCTION ----")

def fact(n):
  for i in range(n):
    yield math.factorial(i)
  return 'Done!'

f_gen = fact(5)
print(f"f_gen is now a generator function: {f_gen}")
print(f"f_gen has the __iter__ method: {'__iter__' in dir(f_gen)}")
print(f"f_gen has the __next__ method: {'__next__' in dir(f_gen)}")
print(f"iter(f_gen) returns f_gen: {iter(f_gen) == f_gen}")

for i in f_gen:
  print(i)

---- ITERATOR PROTOCOL ----
the iterator fact_iter: <__main__.FactIter object at 0x7f2e8961ac10>
1
1
2
6
24
---- ITER SECOND FORM
the iterator fact_iter_2: <callable_iterator object at 0x7f2e896bea90>
1
1
2
6
24
---- GENERATOR FUNCTION ----
f_gen is now a generator function: <generator object fact at 0x7f2e89657750>
f_gen has the __iter__ method: True
f_gen has the __next__ method: True
iter(f_gen) returns f_gen: True
1
1
2
6
24


In [24]:
from timeit import timeit
from functools import lru_cache
# Fibonacci sequence example.
print("---- RECURSIVE CAVEATS ----")
def fib_recursive(n):
  if n <= 1:
    return 1
  else:
    return fib_recursive(n-1) + fib_recursive(n-2)

print(f"fib_recursive(28) {fib_recursive(28)} takes: {timeit('fib_recursive(28)', globals=globals(), number=10)}s")
print(f"fib_recursive(29) {fib_recursive(29)} takes: {timeit('fib_recursive(29)', globals=globals(), number=10)}s")
print(f"fib_recursive(30) {fib_recursive(30)} takes: {timeit('fib_recursive(30)', globals=globals(), number=10)}s")

print("---- LRU CACHE FIX ----")

@lru_cache()
def fib_recursive(n):
  if n <= 1:
    return 1
  else:
    return fib_recursive(n-1) + fib_recursive(n-2)

print(f"fib_recursive(28) {fib_recursive(28)} takes: {timeit('fib_recursive(28)', globals=globals(), number=10)}s")
print(f"fib_recursive(29) {fib_recursive(29)} takes: {timeit('fib_recursive(29)', globals=globals(), number=10)}s")
print(f"fib_recursive(30) {fib_recursive(30)} takes: {timeit('fib_recursive(30)', globals=globals(), number=10)}s")

# However for this approach we will eventually hit the recursion max depth,
# we could modify it but it is not a good practice.
try:
  fib_recursive(2000)
except RecursionError:
  print("No more recursion Please!!")

print("---- NON RECURSIVE ----")
def fib(n):
  fib_0 = 1
  fib_1 = 1
  for i in range(n - 1):
    fib_0, fib_1 = fib_1, fib_0 + fib_1
  return fib_1

print(f"fib(2800) takes: {timeit('fib(2800)', globals=globals(), number=10)}s")
print(f"fib(2900) takes: {timeit('fib(2900)', globals=globals(), number=10)}s")
print(f"fib(3000) takes: {timeit('fib(3000)', globals=globals(), number=10)}s")

---- RECURSIVE CAVEATS ----
fib_recursive(28) 514229 takes: 1.1370543579996593s
fib_recursive(29) 832040 takes: 1.7931512650002333s
fib_recursive(30) 1346269 takes: 2.9065953859999354s
---- LRU CACHE FIX ----
fib_recursive(28) 514229 takes: 1.6399999367422424e-06s
fib_recursive(29) 832040 takes: 1.7000002117129043e-06s
fib_recursive(30) 1346269 takes: 1.5400000847876072e-06s
No more recursion Please!!
---- NON RECURSIVE ----
fib(2800) takes: 0.003225140000722604s
fib(2900) takes: 0.004588750000038999s
fib(3000) takes: 0.003469000000222877s
---- ITERATOR ----
fib_iter: <__main__.FibIter object at 0x7f2e896eb950> iterator


In [37]:
print("---- ITERATOR ----")

def fib(n):
  fib_0 = 1
  fib_1 = 1
  for i in range(n - 1):
    fib_0, fib_1 = fib_1, fib_0 + fib_1
  return fib_1

class FibIter:
  def __init__(self, n):
    self.n = n
    self.i = 0

  def __iter__(self):
    return self

  def __next__(self):
    if self.i >= self.n:
      raise StopIteration
    else:
      result = fib(self.i) # this is not good since we are calculating all fib
                          # Every time we pass through this fib.
      self.i += 1
      return result

fib_iter = FibIter(15)
print(f"fib_iter: {fib_iter} iterator")

for i, fib_num in enumerate(fib_iter):
  print(f"fib number {i} is {fib_num}")

print(f"takes fib_iter: {timeit('list(FibIter(5000))', globals=globals(), number=1)}'s to finish")

print("---- GENERATOR ----")

def fib(n):
  fib_0 = 1
  yield fib_0
  fib_1 = 1
  yield fib_1
  for i in range(n - 2):
    fib_0, fib_1 = fib_1, fib_0 + fib_1
    yield fib_1

fib_gen = fib(20)
print(f"fib_gen: {fib_gen} generator")

for i, fib_num in enumerate(fib_gen):
  print(f"fib number {i} is {fib_num}")

print(f"takes fib_gen: {timeit('list(fib(5000))', globals=globals(), number=1)}'s to finish")

---- ITERATOR ----
fib_iter: <__main__.FibIter object at 0x7f2e896be950> iterator
fib number 0 is 1
fib number 1 is 1
fib number 2 is 2
fib number 3 is 3
fib number 4 is 5
fib number 5 is 8
fib number 6 is 13
fib number 7 is 21
fib number 8 is 34
fib number 9 is 55
fib number 10 is 89
fib number 11 is 144
fib number 12 is 233
fib number 13 is 377
fib number 14 is 610
takes fib_iter: 1.5531883300000118's to finish
---- GENERATOR ----
fib_gen: <generator object fib at 0x7f2e894f3a50> generator
fib number 0 is 1
fib number 1 is 1
fib number 2 is 2
fib number 3 is 3
fib number 4 is 5
fib number 5 is 8
fib number 6 is 13
fib number 7 is 21
fib number 8 is 34
fib number 9 is 55
fib number 10 is 89
fib number 11 is 144
fib number 12 is 233
fib number 13 is 377
fib number 14 is 610
fib number 15 is 987
fib number 16 is 1597
fib number 17 is 2584
fib number 18 is 4181
fib number 19 is 6765
takes fib_gen: 0.0010065699998449418's to finish


## Iterable from Generator

We can use a Generator (which is in itself a lazy iterator) to create a iterable.
- By returning the generator function in the `__iter__` method for the Iterable.

In [5]:
class Squares:
  def __init__(self, n):
    self.n = n

  def __iter__(self):
    return Squares.square_gen(self.n)

  @staticmethod
  def square_gen(n):
    for i in range(n):
      yield i ** 2

# we can now have an iterable squares.
sq = Squares(5)
print(f"Squares: {sq} is an iterable")

for i, val in enumerate(sq):
  print(f"position: {i} has value: {val}")

Squares: <__main__.Squares object at 0x7f3f0f2ebc50> is an iterable
position: 0 has value: 0
position: 1 has value: 1
position: 2 has value: 4
position: 3 has value: 9
position: 4 has value: 16


In [12]:
# Example Card Deck
from collections import namedtuple

Card = namedtuple("Card", "rank suit")
class CardDeck:

  SUITS = ('Spades', 'Hearts', 'Diamonds', 'Clubs')
  RANKS = tuple(range(2, 11)) + tuple('JQKA') 

  def __iter__(self):
    return CardDeck.card_gen()

  def __reversed__(self):
    return CardDeck.reversed_card_gen()

  @staticmethod
  def reversed_card_gen():
    for suit in reversed(CardDeck.SUITS):
      for rank in reversed(CardDeck.RANKS):
        yield Card(rank, suit)

  @staticmethod
  def card_gen():
    for suit in CardDeck.SUITS:
      for rank in CardDeck.RANKS:
        yield Card(rank, suit)

c1 = CardDeck()

print("---- FORWARD ----")
for i in c1:
  print(i)

print("---- BACKWARD ----")
for i in reversed(c1):
  print(i)

---- FORWARD ----
Card(rank=2, suit='Spades')
Card(rank=3, suit='Spades')
Card(rank=4, suit='Spades')
Card(rank=5, suit='Spades')
Card(rank=6, suit='Spades')
Card(rank=7, suit='Spades')
Card(rank=8, suit='Spades')
Card(rank=9, suit='Spades')
Card(rank=10, suit='Spades')
Card(rank='J', suit='Spades')
Card(rank='Q', suit='Spades')
Card(rank='K', suit='Spades')
Card(rank='A', suit='Spades')
Card(rank=2, suit='Hearts')
Card(rank=3, suit='Hearts')
Card(rank=4, suit='Hearts')
Card(rank=5, suit='Hearts')
Card(rank=6, suit='Hearts')
Card(rank=7, suit='Hearts')
Card(rank=8, suit='Hearts')
Card(rank=9, suit='Hearts')
Card(rank=10, suit='Hearts')
Card(rank='J', suit='Hearts')
Card(rank='Q', suit='Hearts')
Card(rank='K', suit='Hearts')
Card(rank='A', suit='Hearts')
Card(rank=2, suit='Diamonds')
Card(rank=3, suit='Diamonds')
Card(rank=4, suit='Diamonds')
Card(rank=5, suit='Diamonds')
Card(rank=6, suit='Diamonds')
Card(rank=7, suit='Diamonds')
Card(rank=8, suit='Diamonds')
Card(rank=9, suit='Diamond

## Generator Expressions

We can have a generator returned by creating a generator expression.
- It is created by using `()`.
- It returns a Generator Object (iterator).
- It implements Lazy evaluation, the value is calculated when needed and tossed after it is finished.
- It can access nonlocal and global scope(it is a function).

In [None]:
# Let's decompose a list comprehension
import dis

exp = compile("[i**2 for i in range(5)]", filename='string', mode='eval')

dis.dis(exp)

# We see how it creates a function.

```
  1           0 LOAD_CONST               0 (<code object <listcomp> at 0x7f3f0f292270, file "string", line 1>)
              2 LOAD_CONST               1 ('<listcomp>')
              4 MAKE_FUNCTION            0
              6 LOAD_NAME                0 (range)
              8 LOAD_CONST               2 (5)
             10 CALL_FUNCTION            1
             12 GET_ITER
             14 CALL_FUNCTION            1
             16 RETURN_VALUE
```

In [None]:
# Let's decompose a generator expression
import dis

exp = compile("(i**2 for i in range(5))", filename='string', mode='eval')

dis.dis(exp)

# We see how it creates a function. and that it is almost identical to a
# list comprehension

```
  1           0 LOAD_CONST               0 (<code object <genexpr> at 0x7f3f0f289930, file "string", line 1>)
              2 LOAD_CONST               1 ('<genexpr>')
              4 MAKE_FUNCTION            0
              6 LOAD_NAME                0 (range)
              8 LOAD_CONST               2 (5)
             10 CALL_FUNCTION            1
             12 GET_ITER
             14 CALL_FUNCTION            1
             16 RETURN_VALUE
```

Just like list comprehensions we can nest generator expressions.
- we get a generator of generators, so it might not be as straight forward.
- once we loop through it we need to recreate the generator expression again since it is exhausted.

In [1]:
start = 1
stop = 10

print("---- GENERATOR OF GENERATORS ----")
mult_gen = ((i*j for j in range(start, stop+1)) for i in range(start, stop+1))
print(f'mult_gen is a generator object: {mult_gen}')

[list(row) for row in mult_gen]

print("---- GENERATOR OF COMPREHENSIONS ----")
mult_gen = ([i*j for j in range(start, stop+1)] for i in range(start, stop+1))
print(f'mult_gen is a generator object: {mult_gen}')

[row for row in mult_gen]
# it does not calculate all comprehension until they are needed in the loop.

---- GENERATOR OF GENERATORS ----
mult_gen is a generator object: <generator object <genexpr> at 0x7f1bf2ba9450>
---- GENERATOR OF COMPREHENSIONS ----
mult_gen is a generator object: <generator object <genexpr> at 0x7f1bf2ba9e50>


[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 [2, 4, 6, 8, 10, 12, 14, 16, 18, 20],
 [3, 6, 9, 12, 15, 18, 21, 24, 27, 30],
 [4, 8, 12, 16, 20, 24, 28, 32, 36, 40],
 [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
 [6, 12, 18, 24, 30, 36, 42, 48, 54, 60],
 [7, 14, 21, 28, 35, 42, 49, 56, 63, 70],
 [8, 16, 24, 32, 40, 48, 56, 64, 72, 80],
 [9, 18, 27, 36, 45, 54, 63, 72, 81, 90],
 [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]]

Pascal Triangle Example

```
1
1 1
1 2 1
1 3 3 1
1 4 6 4 1
```

we just need to know how to calculate combinations:
```
C(n, k) = n! / (k! (n-k)!)
```

* row 0, column 0: n=0, k=0: c(0, 0) = 0! / 0! 0! = 1/1 = 1
* row 4, column 2: n=4, k=2: c(4, 2) = 4! / 2! 2! = 4x3x2 / 2x2 = 6

In other words, we need to calculate the following list of lists:
```
c(0,0)
c(1,0) c(1,1)
c(2,0) c(2,1) c(2,2)
c(3,0) c(3,1) c(3,2) c(3,3)
...
```

In [16]:
from math import factorial

def combo(n, k):
  return factorial(n) // (factorial(k) * factorial(n-k))

size = 600

# Let's check timings on the different approaches.
from timeit import timeit

print("---- LIST COMPREHENSION / LIST COMPREHENSION ----")
t = timeit("[[combo(n, k) for k in range(n+1)] for n in range(size+1)]", globals=globals(), number=1)
print(f"List comprehension of list comprehension takes: {t} sec")

# Calculated everything onto memory

print("---- GENERATOR EXPRESSION / GENERATOR EXPRESSION ----")
t = timeit("((combo(n, k) for k in range(n+1)) for n in range(size+1))", globals=globals(), number=1)
print(f"generator expression of generator expression takes: {t} sec")

# This one did not calculate all combo calls.

print("---- GENERATOR EXPRESSION / LIST COMPREHENSION ----")
t = timeit("((combo(n, k) for k in range(n+1)) for n in range(size+1))", globals=globals(), number=1)
print(f"generator expression of list comprehensions takes: {t} sec")

# This one did not calculate all combo calls. the inner comprehension has not
# been evaluated.

# Let's now evaluate the timing by actualy evaluating the inside
# expression/comprehension.

import tracemalloc

def pascal_list(size):
  l = [[combo(n, k) for k in range(n+1)] for n in range(size+1)]
  for row in l:
    for item in row:
      pass
  # we do this inside the function so memory does not get released.
  stats = tracemalloc.take_snapshot().statistics('lineno')
  print(stats[0].size, 'bytes')

def pascal_gen(size):
  l = ((combo(n, k) for k in range(n+1)) for n in range(size+1))
  for row in l:
    for item in row:
      pass
  # we do this inside the function so memory does not get released.
  stats = tracemalloc.take_snapshot().statistics('lineno')
  print(stats[0].size, 'bytes')

tracemalloc.stop()
tracemalloc.clear_traces()
tracemalloc.start()


print("---- PASCAL LIST ----")
t = timeit("pascal_list(size)", globals=globals(), number=1)
print(f"pascal_list takes: {t} sec")

print("---- PASCAL COMPREHENSION ----")
t = timeit("pascal_gen(size)", globals=globals(), number=1)
print(f"pascal_gen takes: {t} sec")

# when iterating over all elements there is NOT an actual difference between
# the two, however the memory usage is a lot more efficient since it instantiate
# the element and tosses it when it finishes.

---- LIST COMPREHENSION / LIST COMPREHENSION ----
List comprehension of list comprehension takes: 2.2828103340000325 sec
---- GENERATOR EXPRESSION / GENERATOR EXPRESSION ----
generator expression of generator expression takes: 3.420999973968719e-06 sec
---- GENERATOR EXPRESSION / LIST COMPREHENSION ----
generator expression of list comprehensions takes: 3.3090000215452164e-06 sec
---- PASCAL LIST ----
11499756 bytes
pascal_list takes: 6.111028124999848 sec
---- PASCAL COMPREHENSION ----
9248 bytes
pascal_gen takes: 5.824173759999894 sec


## Yield From

We can delegate the yielding to another iterator, meaning that basically if we are iterating an iterable we delegate the yielding to it.

In [21]:
def matrix(n):
  gen = ((i*j for j in range(n+1))
          for i in range(1, n+1))
  return gen

# remember how this returns a generator of generators.
def matrix_iter(n):
  for row in matrix(n):
    yield from row

for i in matrix_iter(3):
  print(i)


# we could use this for different files that we need to iterate 
def gen_clean_data(file):
  with open(file) as f:
    for row in f:
      yield row.strip('\n')

def files_iter(*files):
  for f_name in files:
    yield from gen_clean_data(f_name)

0
1
2
3
0
2
4
6
0
3
6
9
