# 迭代器、生成器
* 生成器函数作深度遍历
yield from 语法的伪代码，伪代码的对象是生成器`代理端`；

RESULT = yield from EXPR
```
_i = iter(EXPR)
try:
  _y = next(_i)
catch StopIteration:
  _r = e.value
else:
  while 1:
    _s = yield _y
    try:
      _y = _i.send(_s)
    catch StopIteration as _e:
      _r = _e.value
      break

RESULT = _r

```


In [5]:
class Node:
    def __init__(self, value):
        self._value = value
        self._children = []
    
    def add_child(self, node):
        self._children.append(node)
    
    def __repr__(self):
        return 'Node{!r}'.format(self._value)
    
    def __iter__(self):
        return iter(self._children)
    
    def deep_first(self):
        yield self
        for i in self:
            yield from i.deep_first()

root = Node(0)
child1 = Node(1)
child2 = Node(2)
child3 = Node(3)
child4 = Node(4)
child5 = Node(5)


root.add_child(child1)
root.add_child(child2)

child1.add_child(child3)
child1.add_child(child4)

child2.add_child(child5)


for ch in root.deep_first() :
    print(ch)

Node0
Node1
Node3
Node4
Node2
Node5


## 生成器的两者结构方式
* generator expression
* generator function

> Calling a generator function creates a generator object
.However,it does not start running the function 

In [9]:
def countdown(n):
    print('Counting down from ',n)
    while n > 0:
        yield n
        n -= 1

# 此时 countdown 并没有执行代码块

x = countdown(5)

# 激活 generator 执行
x.__next__()

Counting down from  5


5

# QA
## 1.在 PythonCookbook 通过读取日志文件的生成器管道的例子，为什么会使用到 `yield from` 语句；
```
    # 使用 yield from 
    def gen_concatenate(iterators):
        for it in iterators:
            yield from it
```

因为生成器是需要通过__next__来驱动，yield from 类似精简了其中一个 for 循环,等价于：
```
    def gen_concatenate(iterators):
        for it in iterators:
            for item in it:
                yield it
```

如果不用 yield from 来驱动内层生成器 it，则会导致 gen_concatenate 产出的是迭代器对象；

通过下面 chain 的自定义实现来说明：，它输出的内容是‘['ABC', (0, 1, 2, 3)]’ 原始的可迭代对象，我们预期的是输出 ['A','B','C',0,1,2,3]
```
    def chain(*iterables):
        for it in iterables:
            yield it
    ​
    s = 'ABC'
    t = tuple(range(4))
    ​
    list(chain(s,t))
    ['ABC', (0, 1, 2, 3)]
```

``` 
    def chain(*iterators):
        for it in iterators:
            yield from it
```
等价于：
``` 
    def chain(*iterators):
        for its in iterators:
            for it in ites:
                yield from it
```

> 当生成器返回的还是生成器时，需要考虑使用 `yield from` 驱动生成器执行


In [16]:
def chain(*iterables):
    for it in iterables:
        yield it

s = 'ABC'
t = tuple(range(4))

def chain2(*iterables):
    for it in iterables:
        yield from it
list(chain2(s,t))

['A', 'B', 'C', 0, 1, 2, 3]

> 将一个文件内容分别写入多个文件中

In [80]:
import random


PATH = '/Users/dengjiaying/Project/Py/exercise/fluentPy/data/log'
def gen_open(paths):
    for path in paths:
        with open(path,'rt') as f:
            yield f

def gen_cat(files):
    for file in files:
        yield from file

        
def gen_file():
    suffix = range(10)
    prefix = ['access-log'] * len(suffix)
    return [open('{}{}.{}'.format(PATH,pre,suf),'wt') for pre,suf in zip(prefix, suffix)]
    
def gen_shuffle(lines):
    randrange = random.randrange
    outs = gen_file()
    for line in lines:
        idx = randrange(10)
        print(line, file=outs[idx])

In [79]:
paths = Path('/Users/dengjiaying/Project/Py/exercise/fluentPy/data/').rglob('access*')
files = gen_open(paths)
lines = gen_cat(files)
gen_shuffle(lines)

In [63]:
random.randrange(10)

7

In [12]:
f = open('/Users/dengjiaying/Project/Py/statistics/colors.py')
for line in f:
    line.rsplit??

[0;31mSignature:[0m [0mline[0m[0;34m.[0m[0mrsplit[0m[0;34m([0m[0msep[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mmaxsplit[0m[0;34m=[0m[0;34m-[0m[0;36m1[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Return a list of the words in the string, using sep as the delimiter string.

  sep
    The delimiter according which to split the string.
    None (the default value) means split according to any whitespace,
    and discard empty strings from the result.
  maxsplit
    Maximum number of splits to do.
    -1 (the default value) means no limit.

Splits are done starting at the end of the string and working to the front.
[0;31mType:[0m      builtin_function_or_method


> `yield from` Concatenate items from one or more source into a single sequence of items

In [98]:
# for 循环处理多个 sources 迭代对象
def gen_cat(sources):
    for src in sources:
        yield src
#         for item in src:
#             yield item

def gen_cat2(sources):
    for src in sources:
        yield from src

> `yield from` used to delegate iteration

In [16]:
def count_down(n):
    while n > 0:
        yield n
        n -= 1
def count_up(stop):
    n = 1
    while(n < stop):
        yield n
        n += 1
        
def up_and_down(n):
    yield from count_up(n)
    yield from count_down(n)

In [19]:
a = [1,2,3,4]
b = (x**2 for x in a)

In [33]:
s = '81.107.39.38 - ... "GET /ply/ply.html HTTP/1.1" 200 97238'
s.rsplit(None,1)[1]

'97238'

In [99]:
with open('/Users/dengjiaying/Project/Py/exercise/fluentPy/data/log/access-log') as wwwlog:
    bytecolumn = (line.rsplit(None,1)[1] for line in wwwlog)
    byte_sent  = (int(x) for x in bytecolumn if x != '-')
    print("Total {} b".format(sum(byte_sent)))

Total 4220541399 b


In [96]:
import random

suffix = range(10)
prefix = ['access-log'] * len(suffix)

filenames = ('{}.{}'.format(pre,suf) for (pre,suf) in zip(prefix,suffix))
with open('/Users/dengjiaying/Project/Py/exercise/fluentPy/data/log/access-log') as wwwlog:
        

SyntaxError: invalid syntax (<ipython-input-96-8774e09c6491>, line 8)

In [12]:
import re
from pathlib import Path
# paths = Path('/Users/dengjiaying/Project/Py/statistics/data').rglob('req*')

def gen_open(paths):
    for path in paths:
        with open(path,'rt') as f:
#         f = open(path, 'rt')
            yield f
        
        
def gen_cat(sources):
    for src in sources:
        yield from src

def gen_grep(pat,lines):
    patc = re.compile(pat)
    return (line for line in lines if patc.search(line))

In [None]:
paths = Path('/Users/dengjiaying/Project/Py/exercise/fluentPy/data/log/').rglob('access*')
logfiles = gen_open(paths)
lines = gen_cat(logfiles)
# patlines = gen_grep('2622',loglines)



for item in lines:
    print(item)
# bytecolumn = (line.rsplit(None,1)[1] for line in lines)
# byte_sent  = (int(x) for x in bytecolumn if x != '-')
# print("Total {} b".format(sum(byte_sent)))

10.150.0.15 - - [12/Aug/2020:00:00:08 +0800] "GET /udata/u.gif?h=654&w=360&ct=1597161607204&si=300011960346&ad=&cu=dev.coc.10086.cn&v=1.0&s=1597161604068840485&f=2&c=1428456744583&cp=https%3A%2F%2Fdev.coc.10086.cn%2Fcoc%2Fweb%2Fcoc2020%2FheartChoose%2F%3FchannelId%3DC00004001111&fp=&t=5459&st=1 HTTP/1.1" 200 5







10.150.0.15 - - [12/Aug/2020:00:00:08 +0800] "GET /udata/u.gif?h=636&w=360&ct=1597161608473&si=%40id%40&cu=apiserv.cmicrwx.cn&v=1.2&s=1597161608461949430&udtm_compaign=&channel=&f=1&c=1428456744583 HTTP/1.1" 200 5



10.150.0.15 - - [12/Aug/2020:00:00:24 +0800] "GET /udata/udata.js?aid=300011876709 HTTP/1.1" 200 21682











10.150.0.15 - - [12/Aug/2020:00:00:55 +0800] "POST /udata/u.gif? HTTP/1.1" 200 5







10.150.0.15 - - [12/Aug/2020:00:01:24 +0800] "GET /udata/u.gif?h=725&w=414&ct=1597161684402&si=300011958696&cu=apiserv.cmicrwx.cn&v=1.2&s=1597161684086779021&udtm_compaign=&channel=null&f=2&c=1428456744583&a=%3DIzMzAzMyYTMxUTM&cp=https%3A%2F%2Fapiserv.cmicrwx.cn

In [14]:
for item in patlines:
    print(item)

TypeError: expected string or bytes-like object

In [76]:
patlines??

[0;31mType:[0m        generator
[0;31mString form:[0m <generator object gen_grep.<locals>.<genexpr> at 0x7fc313e87bd0>
[0;31mDocstring:[0m   <no docstring>


In [59]:
files = gen_open(paths)

In [61]:
for file in files:
    print(f)

/Users/dengjiaying/Project/Py/statistics/data/req_history.csv
/Users/dengjiaying/Project/Py/statistics/data/req_history.csv
/Users/dengjiaying/Project/Py/statistics/data/req_history.csv
