In [None]:
# Chapter 4 Iterators and Generators

Iteration is one of Python’s strongest features. At a high level, you might simply view iteration as a way to process items in a sequence. 

## 4.1 Manually Consuming an Iterator

To manually consume an iterable, use the **next()** function and write your code to catch the StopIteration exception. 

In [None]:
with open('/etc/passwd') as f:
    try:
        while True:
            line = next(f)
            print(line, end='')
    except StopIteration:
        pass


In [None]:
items = [1, 2, 3]
# Get the iterator
it = iter(items)

# Run the iterator
next(it)

next(it)

next(it)

3

## 4.2 Delegating Iteration

Typically, all you need to do is define an **__iter__()** method that delegates iteration to the internally held container.

In [None]:
class Node:
    def __init__(self, value):
        self._value = value
        self._children = []
    
    def __repr__(self):
        return 'Node({!r})'.format(self._value)
    
    def add_child(self, node):
        self._children.append(node)

    def __iter__(self):
        return iter(self._children)

# Example
if __name__ == '__main__':
    root = Node(0)
    child1 = Node(1)
    child2 = Node(2)
    root.add_child(child1)
    root.add_child(child2)
    for ch in root:
        print(ch)
        
    

Node(1)
Node(2)


## 4.3 Creating New Iteration Patterns with Generators

In [None]:
def frange(start, stop, increment):
    x = start
    while x < stop:
        yield x
        x +=increment

for n in frange(0, 4, 0.5):
    print(n)

print(list(frange(0, 1, 0.125)))

0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
[0, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875]


In [None]:
def countdown(n):
    print('Starting to count from', n)
    while n > 0:
        yield n
        n -= 1
    print('Done!')   

# Create the generator, notice no output appears
c = countdown(3)
next(c)

Starting to count from 3


3

## 4.4 Implementing the Iterator Protocol

By far, the easiest way to implement iteration on an object is to use a generator function.

In [None]:
class Node:
    def __init__(self, value):
        self._value = value
        self._children = []

    def __repr__(self):
        return 'Node({!r})'.format(self._value)

    def add_child(self, node):
        self._children.append(node)

    def __iter__(self):
        return iter(self._children)

    def depth_first(self):
        yield self
        for c in self:
            yield from c.depth_first()

# Example
if __name__ == '__main__':
    root = Node(0)
    child1 = Node(1)
    child2 = Node(2)
    root.add_child(child1)
    root.add_child(child2)
    child1.add_child(Node(3))
    child1.add_child(Node(4))
    child2.add_child(Node(5))

    for ch in root.depth_first():
        print(ch)


Node(0)
Node(1)
Node(3)
Node(4)
Node(2)
Node(5)


## 4.5 Iterating in Reverse

Use the built-in **reversed()** function. 

In [None]:
a = [1, 2, 3, 4]
for x in reversed(a):
    print(x)

4
3
2
1


In [None]:
class Countdown:
    def __init__(self, start):
        self.start = start
    
    def __iter__(self)：
        n = self.start
        while n > 0:
            yield n
            n -= 1

    def __reversed__(self):
        n = 1
        while n <= self.start:
            yield n
            n += 1

In [None]:
## 4.6 Defining Generator Functions with Extra State

If you want a generator to expose extra state to the user, don’t forget that you can easily implement it as a class, putting the generator function code in the __iter__() method.

In [None]:
from collections import deque

class linehistory:
    def __init__(self, lines, hislen=3):
        self.lines = lines
        self.history = deque(maxlen=hislen)
    
    def __iter__(self):
        for lineno, line in enumerate(self.lines, 1):
            self.history.append((lineno, line))
            yield line
    
    def clear(self):
        self.history.clear()
    
f = open('sample.txt')
lines= linehistory(f)
    
it = iter(lines)
next(it)
next(it)

'this is a test'

## 4.7 Taking a Slice of an Iterator

The **itertools.islice()** function is perfectly suited for taking slices of iterators and generators.

In [None]:
import itertools
for x in itertools.islice(c, 10, 20):
    print(x)

10
11
12
13
14
15
16
17
18
19


## 4.8 Skipping the First Part of an iterable

The itertools module has a few functions that can be used to address this task. The first is the **itertools.dropwhile()** function. 

In [None]:
from itertools import dropwhile
with open('/etc/passwd') as f:
    for line in dropwhile(lambda line: line.startswith('#'), f):
        print(line, end='')

In [None]:
from itertools import islice
items = ['a', 'b', 'c', 1, 4, 10, 15]
for x in islice(items, 3, None):
    print(x)

1
4
10
15


## 4.9  Iterating Over All Possible Combinations or Permutations

The itertools module provides three functions for this task. The first of these **iter tools.permutations()**takes a collection of items and produces a sequence of tuples that rearranges all of the items into all possible permutations (i.e., it shuffles them into all possible configurations). 

In [None]:
items = ['a', 'b', 'c']
from itertools import permutations

for p in permutations(items):
    print(p)

for p in permutations(items, 2):
    print(p)

from itertools import combinations, combinations_with_replacement
for c in combinations(items, 3):
    print(c)

for c in combinations(items, 2):
    print(c)

for c in combinations(items, 1):
    print(c)

for c in combinations_with_replacement(items, 3):
    print(c)

('a', 'b', 'c')
('a', 'c', 'b')
('b', 'a', 'c')
('b', 'c', 'a')
('c', 'a', 'b')
('c', 'b', 'a')
('a', 'b')
('a', 'c')
('b', 'a')
('b', 'c')
('c', 'a')
('c', 'b')
('a', 'b', 'c')
('a', 'b')
('a', 'c')
('b', 'c')
('a',)
('b',)
('c',)
('a', 'a', 'a')
('a', 'a', 'b')
('a', 'a', 'c')
('a', 'b', 'b')
('a', 'b', 'c')
('a', 'c', 'c')
('b', 'b', 'b')
('b', 'b', 'c')
('b', 'c', 'c')
('c', 'c', 'c')


## 4.10 Iterating Over the Index-Value Paris of a Sequence



In [None]:
my_list = ['a', 'b', 'c']

for idx, val in enumerate(my_list):
    print(idx, val)

for idx, val in enumerate(my_list, 1):
    print(idx, val)

0 a
1 b
2 c
1 a
2 b
3 c


In [None]:
from collections import defaultdict

word_summary = defaultdict(list)

with open('sample.txt', 'r') as f:
    lines = f.readlines()

for idx, line in enumerate(lines):
    # Create a list of words in current line
    words = [w.strip().lower() for w in line.split()]
    for word in words:
        word_summary[word].append(idx)

print(word_summary)

defaultdict(<class 'list'>, {'hello': [0], 'world': [0], 'this': [1], 'is': [1], 'a': [1], 'test': [1]})


In [None]:
data = [(1,2), (3,4), (5,6), (7,8)]

for n, (x, y) in enumerate(data):
    print("{}: ({}, {})".format(n, x, y))

0: (1, 2)
1: (3, 4)
2: (5, 6)
3: (7, 8)


## 4.11 Iterating Over Multiple Sequences Simultaneously

To iterate over more than one sequence simultaneously, use the **zip()** function.

In [1]:
xpts = [1, 5, 4, 2, 10, 7]
ypts = [101, 78, 37, 15, 62, 99]

for x, y in zip(xpts, ypts):
    print(x, y)

a = [1, 2, 3]
b = ['w', 'x', 'y', 'z']
for i in zip(a, b):
    print(i)

from itertools import zip_longest
for  i in zip_longest(a, b, fillvalue=0):
    print(i)

1 101
5 78
4 37
2 15
10 62
7 99
(1, 'w')
(2, 'x')
(3, 'y')
(1, 'w')
(2, 'x')
(3, 'y')
(0, 'z')


In [2]:
headers = ['name', 'shares', 'price']
values = ['ACME', 100, 490.1]

for name, val in zip(headers, values):
    print(name,'=', val)

name = ACME
shares = 100
price = 490.1


In [None]:
a = [1, 2, 3]
b = [10, 11, 12]
c = ['x', 'y', 'z']
for i in zip(a, b, c):
    print(i)

# store into a list
print(list(zip(a,b)))

(1, 10, 'x')
(2, 11, 'y')
(3, 12, 'z')
[(1, 10), (2, 11), (3, 12)]


## 4.12 Iterating on Items in Separate Containers

The **itertools.chain()** method can be used to simplify this task. It takes a list of iterables as input, 
and returns an iterator that effectively masks the fact that you’re really acting on multiple containers.

In [3]:
from itertools import chain
a = [1, 2, 3, 4]
b = ['x', 'y', 'z']
for x in chain(a, b):
  print(x)

1
2
3
4
x
y
z


## 4.13 Creating Data Processing Pipelines
Generator functions are a good way to implement processing pipelines. To illustrate, suppose you have a huge directory of log files that you want to process:
        
        
        foo/
           access-log-012007.gz
           access-log-022007.gz
           access-log-032007.gz
           ...
           access-log-012008
        bar/
           access-log-092007.bz2
           ...
           access-log-022008

In [4]:
import os
import fnmatch
import gzip
import bz2
import re

def gen_find(filepat, top):
  ''' 
  Find all filenames in a directory tree that match a shell wildcard pattern
  '''
  for path, dirlist, filelist in os.walk(top):
    for name in fnmatch.filter(filelist, filepat):
      yield os.path.join(path, name)


def gen_opener(filenames):
  '''
  Open a sequence of filenames one at a time producing a file object. 
  The file is closed immediately when proceeding to the next iteration.
  '''
  for filename in filenames:
    if filename.endswith('.gz'):
      f = gzip.open(filename, 'rt')
    elif filename.endswith('.bz2'):
      f = bz2.open(filename, 'rt')
    else:
      f = open(filename, 'rt')
    yield f
    f.close()

def gen_concatenate(iterators):
  '''
  Chain a sequence of iterators together into a single sequence. 
  '''
  for it in iterators:
    yield from iter

def gen_grep(pattern, lines):
  '''
  Look for a regex pattern in a sequence of lines 
  '''
  pat = re.compile(pattern)
  for line in lines:
    if pat.search(line): 
      yield line


lognames = gen_find('access-log*', 'www')
files = gen_opener(lognames)
lines = gen_concatenate(files)
pylines = gen_grep('(?i)python', lines)
bytecolumn = (line.rsplit(None,1)[1] for line in pylines) 
bytes = (int(x) for x in bytecolumn if x != '-') 
print('Total', sum(bytes))

Total 0


## 4.14 Flattening a Nested Sequence

This is easily solved by writing a recursive generator function involving a yield from statement.

In [14]:
from collections import Iterable

def flatten(items, ignore_types=(str, bytes)):
  for x in items:
    if isinstance(x, Iterable) and not isinstance(x, ignore_types):
      yield from flatten(x)
    else:
      yield x

items = [1, 2, [3, 4, [5, 6], 7], 8]

for x in flatten(items):
  print(x)

1
2


TypeError: ignored

## 4.15 Iterating in Sorted Order Over Merged Sorted Iterables

You have a collection of sorted sequences and you want to iterate over a sorted sequence of them all merged together. The **heapq.merge()** function does exactly what you want.

In [15]:
import heapq

a = [1, 4, 7, 10]
b = [2, 5, 6, 11]

for c in heapq.merge(a, b):
  print(c)

1
2
4
5
6
7
10
11


## 4.16 Replacing Infinite while Loops with an Iterator


In [16]:
import sys
f = open('/etc/passwd')
for chunk in iter(lambda: f.read(10), ''):
  n = sys.stdout.write(chunk)


root:x:0:0:root:/root:/bin/bash
daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin
bin:x:2:2:bin:/bin:/usr/sbin/nologin
sys:x:3:3:sys:/dev:/usr/sbin/nologin
sync:x:4:65534:sync:/bin:/bin/sync
games:x:5:60:games:/usr/games:/usr/sbin/nologin
man:x:6:12:man:/var/cache/man:/usr/sbin/nologin
lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin
mail:x:8:8:mail:/var/mail:/usr/sbin/nologin
news:x:9:9:news:/var/spool/news:/usr/sbin/nologin
uucp:x:10:10:uucp:/var/spool/uucp:/usr/sbin/nologin
proxy:x:13:13:proxy:/bin:/usr/sbin/nologin
www-data:x:33:33:www-data:/var/www:/usr/sbin/nologin
backup:x:34:34:backup:/var/backups:/usr/sbin/nologin
list:x:38:38:Mailing List Manager:/var/list:/usr/sbin/nologin
irc:x:39:39:ircd:/var/run/ircd:/usr/sbin/nologin
gnats:x:41:41:Gnats Bug-Reporting System (admin):/var/lib/gnats:/usr/sbin/nologin
nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin
_apt:x:100:65534::/nonexistent:/usr/sbin/nologin
systemd-network:x:101:104:systemd Network Management,,,:/run/systemd/netif