# Generators and Iterators

## Building your own generators with `yield`

In [1]:
def counter(start, end):
    current = start
    while current < end:
        yield current
        current += 1

In [2]:
counter(1, 10)

<generator object counter at 0x10d0f3b90>

In [3]:
x = counter(1,10)
x.next()

1

In [4]:
x.next()

2

In [5]:
x.next()

3

In [6]:
x = counter(1,10)
list(x)

[1, 2, 3, 4, 5, 6, 7, 8, 9]

`yield` can also be used as a function, along with the `send()` method

In [7]:
def accumulator(start=0):
    current = start
    while True:
        current += yield(current)

In [8]:
x = accumulator()
x.next()

0

In [9]:
x.send(1)

1

In [10]:
x.send(1)

2

In [11]:
x.send(10)

12

## The iterator protocol

What does `for x in sequence:` *really* do?

In [12]:
seq = range(4)
for x in seq: print x

0
1
2
3


In [13]:
iter_seq = iter(seq)
print iter_seq

<listiterator object at 0x10d0e0050>


In [14]:
iter_seq = iter(seq)
try:
    while True:
        x = iter_seq.next()
        print x
except StopIteration:
    pass

0
1
2
3


Generators are their own iterators:

In [15]:
print counter(0, 4)
print iter(counter(0, 4))

<generator object counter at 0x10d0f3fa0>
<generator object counter at 0x10d0f3fa0>


In [16]:
for item in counter(0, 4): print item

0
1
2
3


We can also define our own iterator classes (though generators are usually more readable):

In [17]:
class Counter(object):
    def __init__(self, start, end):
        self._start = start
        self._end = end
    def __iter__(self):
        return CounterIterator(self._start, self._end)
    
class CounterIterator(object):
    def __init__(self, start, end):
        self._cur = start
        self._end = end
    def next(self):
        result = self._cur
        self._cur += 1
        if result < self._end:
            return result
        else:
            raise StopIteration

ctr = Counter(0, 5)
print list(ctr)    

[0, 1, 2, 3, 4]


## List comprehensions

If you thought the functionality of `map` and `filter` were great, but you didn't like defining tons of little functions, you're going to *love* Python's list comprehensions:

In [19]:
[ x*2 for x in range(4) ]

[0, 2, 4, 6]

In [20]:
lst = [ ]
for x in range(4):
    lst.append(x*2)
lst

[0, 2, 4, 6]

In [21]:
[ (x,y) for x in range(4) for y in range(4) ]

[(0, 0),
 (0, 1),
 (0, 2),
 (0, 3),
 (1, 0),
 (1, 1),
 (1, 2),
 (1, 3),
 (2, 0),
 (2, 1),
 (2, 2),
 (2, 3),
 (3, 0),
 (3, 1),
 (3, 2),
 (3, 3)]

In [22]:
[ [ (r,c) for c in range(4) ]
  for r in range(4) ]

[[(0, 0), (0, 1), (0, 2), (0, 3)],
 [(1, 0), (1, 1), (1, 2), (1, 3)],
 [(2, 0), (2, 1), (2, 2), (2, 3)],
 [(3, 0), (3, 1), (3, 2), (3, 3)]]

In [24]:
[ x * 4
  for x in range(10) 
  if x % 2 == 0 
  if x % 3 == 0 ]

[0, 24]

## Generator expressions

In [26]:
[ x for x in range(10) if x % 2 == 0 ]

[0, 2, 4, 6, 8]

In [27]:
( x for x in range(10) if x % 2 == 0 )

<generator object <genexpr> at 0x10d11d910>

In [28]:
gen = ( x for x in range(10) if x % 2 == 0 )

In [29]:
gen.next()

0

In [30]:
gen.next()

2

In [31]:
list(gen)

[4, 6, 8]

### Exercises

- Write a generator that will yield the nodes of a tree and their depth in post-order
- Write a loop that uses that generator to *print* the nodes of a tree in post-order

## The `itertools` module

`itertools` provides a number of "higher-order iterators" that allow you to combine iterators in interesting ways.

In [32]:
from itertools import chain, izip, count, groupby

In [33]:
# chain links multiple iterators end-to-end
xs = range(10)
ys = 'abcdef'
list(chain(xs, ys))


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 'b', 'c', 'd', 'e', 'f']

In [34]:
# izip lets us "iteratively zip" multiple iterators. Useful when building a giant dictionary:
import string
dict(izip(string.lowercase, string.uppercase[:10]))

{'a': 'A',
 'b': 'B',
 'c': 'C',
 'd': 'D',
 'e': 'E',
 'f': 'F',
 'g': 'G',
 'h': 'H',
 'i': 'I',
 'j': 'J'}

In [35]:
# count() gives us a simple iterator of consecutive values

for i, letter in izip(count(), string.letters[:10]):
    print i, letter

0 A
1 B
2 C
3 D
4 E
5 F
6 G
7 H
8 I
9 J


In [36]:
for i, letter in enumerate(string.letters[:10]):
    print i, letter

0 A
1 B
2 C
3 D
4 E
5 F
6 G
7 H
8 I
9 J


`groupby()` allows us to efficiently group values from an iterator into sub-values. For instance, we might have 
some datetime-based data that we wish to convert to date-based data:

In [37]:
from random import random
from datetime import datetime, timedelta

trades = []
dt = datetime(2016, 4, 24)
while dt < datetime(2016,4,27):
    trades.append((dt, random()))
    dt += timedelta(hours=1)
    
print len(trades)

72


In [39]:
def day_of_trade(val):
    dt, value = val
    return dt.date()

for date, date_trades in groupby(trades, key=day_of_trade):
    print date, len(list(date_trades))


2016-04-24 24
2016-04-25 24
2016-04-26 24


In [44]:
for date, date_trades in groupby(trades, key=day_of_trade):
    date_trades = list(date_trades)
    print date, sum(v for dt, v in date_trades) / len(list(date_trades))    


 2016-04-24 0.544759783227
2016-04-25 0.525789601446
2016-04-26 0.467047127969


### Note that your data *must* already be sorted in a "grouped" order if you use `groupby`. If you wish to group *unsorted* data, you should use a `defaultdict` instead.