## 第一章 数据结构和算法

#### 将序列分解为单独的变量

In [5]:
p = (4,5)
a,b = p

# 引申出任意长度的可迭代对象中分解元素


*first,last = [x for x in range(1,100)]
first,*middle,last = [x for x in range(1,100)]
first,*last = [x for x in range(1,100)]

# eg:
record = ('ACME',50,123.45,(10,26,2017))
name,*_,(*_,year) = record
name
year

2017

#### 保存最后N个元素

In [49]:
from collections import deque

def search(lines,pattern,history = 5):
    previous_lines = deque(maxlen = history)
    for line in lines:
        if pattern in line:
            yield line,previous_lines
        previous_lines.append(line)
        
with open('test.txt') as f:
    for line, prevlines in search(f,'better',5):
        for pline in prevlines:
            print('*',pline,end = '')
        print(line,end = '')
        print('-'*35)

* The Zen of Python, by Tim Peters
* 
Beautiful is better than ugly.
-----------------------------------
* The Zen of Python, by Tim Peters
* 
* Beautiful is better than ugly.
Explicit is better than implicit.
-----------------------------------
* The Zen of Python, by Tim Peters
* 
* Beautiful is better than ugly.
* Explicit is better than implicit.
Simple is better than complex.
-----------------------------------
* The Zen of Python, by Tim Peters
* 
* Beautiful is better than ugly.
* Explicit is better than implicit.
* Simple is better than complex.
Complex is better than complicated.
-----------------------------------
* 
* Beautiful is better than ugly.
* Explicit is better than implicit.
* Simple is better than complex.
* Complex is better than complicated.
Flat is better than nested.
-----------------------------------
* Beautiful is better than ugly.
* Explicit is better than implicit.
* Simple is better than complex.
* Complex is better than complicated.
* Flat is better than

#### 找到最大或最小的N个元素

In [69]:
import heapq

nums = [1,5,9,2,3,6,8,7,4,11,22,33]
print(heapq.nlargest(3,nums))
print(heapq.nsmallest(3,nums))

portfolio = [
    {'name':'Jack1','price':123.4},
    {'name':'Jack2','price':25.364},
    {'name':'Jack3','price':4552.2},
    {'name':'Jack4','price':4123.4},
    {'name':'Jack5','price':124}
]
cheap = heapq.nsmallest(2,portfolio,key = lambda s:s['price'])
print(cheap)

# 将最小值放置到首位
heapq.heapify(nums)
nums
# 每次弹出最小值
heapq.heappop(nums)
2796*3

[33, 22, 11]
[1, 2, 3]
[{'price': 25.364, 'name': 'Jack2'}, {'price': 123.4, 'name': 'Jack1'}]


8388

* 当要找的元素数量相对较小时 用nlargest() nsamllest()
* 如果只是想简单的找到最大最小 用max() min()
* 如果N 和 集合本身的大小差不多的话 用sorted(items)[:N]

#### 找出序列中出现次数最多的元素

In [7]:
words = [
    'i','am','hello','hello','world','zoo','zoo','zoo',
    'you','are','are'
]

from collections import Counter

word_counts = Counter(words)
top_three = word_counts.most_common(3)
top_three

[('zoo', 3), ('hello', 2), ('are', 2)]

#### 通过公共键对字典列表排序

In [15]:
rows = [
    {'name':'Abc','id':1002},
    {'name':'Jack','id':1001},
    {'name':'Tom','id':1005}
]
from operator import itemgetter

rows_by_name = sorted(rows,key=itemgetter('name'))
print(rows_by_name)
rows_by_uid = sorted(rows,key=itemgetter('id'))
print(rows_by_uid)

[{'id': 1002, 'name': 'Abc'}, {'id': 1001, 'name': 'Jack'}, {'id': 1005, 'name': 'Tom'}]
[{'id': 1001, 'name': 'Jack'}, {'id': 1002, 'name': 'Abc'}, {'id': 1005, 'name': 'Tom'}]


* 还可以使用lambda表达式

In [20]:
rows_by_uid = sorted(rows,key=lambda r: r['id'])
rows_by_uid

[{'id': 1001, 'name': 'Jack'},
 {'id': 1002, 'name': 'Abc'},
 {'id': 1005, 'name': 'Tom'}]

* 同样适用于max() min()

In [23]:
min(rows,key=itemgetter('id'))
max(rows,key=lambda r:r['name'])

{'id': 1005, 'name': 'Tom'}

#### 对原生不支持比较操作的对象排序

In [29]:
class User:
    def __init__(self,user_id):
        self.user_id = user_id
    def __repr__(self):
        return "User({})".format(self.user_id)

users = [User(12),User(3),User(25)]
users
sorted(users,key=lambda u:u.user_id)

[User(3), User(12), User(25)]

* 除了lambda还可以用operator.attrgetter()

In [31]:
from operator import attrgetter

sorted(users,key=attrgetter('user_id'))

[User(3), User(12), User(25)]

使用lambda还是attrgetter取决于自身喜好 通常attrgetter要快一些,而且attr具有同时提取多个字段值的能力
* attrgetter也可以用于min()和max()

#### 根据字段将记录分组

In [54]:
rows = [
    {'phone':123456,'date':'10/30/2017'},
    {'phone':55,'date':'10/25/2017'},
    {'phone':156,'date':'11/05/2017'},
    {'phone':13456,'date':'09/20/2017'},
    {'phone':1236,'date':'10/15/2017'}
]

from operator import itemgetter
from itertools import groupby

# 列表按date排序
rows.sort(key=itemgetter('date'))

for date,items in groupby(rows,key=itemgetter('date')):
    print(date)
    for i in items:
        print('    ',i)

09/20/2017
     {'date': '09/20/2017', 'phone': 13456}
10/15/2017
     {'date': '10/15/2017', 'phone': 1236}
10/25/2017
     {'date': '10/25/2017', 'phone': 55}
10/30/2017
     {'date': '10/30/2017', 'phone': 123456}
11/05/2017
     {'date': '11/05/2017', 'phone': 156}


#### 筛选序列中的元素

In [56]:
mylist = [1,2,3,-1,-2,-10]
[n for n in mylist if n>0]

[1, 2, 3]

#### 将名称映射到序列的元素中

In [59]:
from collections import namedtuple

Subscriber = namedtuple('Subscriber',['addr','joined'])
sub = Subscriber('123@abc.net','2017-10-30')
sub.joined

'2017-10-30'

* 如果需要涉及字典的大型数据结构,使用namedtuple会更加有效,但是需要注意的是namedtuple是不可变的
* 如果需要修改任何属性通过_replace()实现 该方法会创建一个全新的命名元组

In [62]:
sub = sub._replace(addr='456@123.net')
sub

Subscriber(addr='456@123.net', joined='2017-10-30')

#### 将多个映射合并为单个映射

In [79]:
a = {'x':1,'y':3}
b = {'y':2,'z':5}

# 第一种
from collections import ChainMap
c = ChainMap(a,b)
print(c)
del c['y']
print(c)
c['y']

# 第二种 
c = a
# update()会改变原始数据
c.update(b)
c

ChainMap({'x': 1, 'y': 3}, {'z': 5, 'y': 2})
ChainMap({'x': 1}, {'z': 5, 'y': 2})


{'x': 1, 'y': 2, 'z': 5}