### 解压可迭代对象

In [None]:
record = ('ACME', 50, 123.45, (12, 18, 2012))
name, *_, (*_, year) = record
print(name)
print(year)

### 保留最后 N 个元素

In [None]:
from collections import deque

q = deque(maxlen=3)
q.append(1)
q.append(2)
q.append(3)

### 最大或最小的 N 个元素

In [None]:
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums)) # Prints [42, 37, 23]
print(heapq.nsmallest(3, nums)) # Prints [-4, 1, 2]

In [None]:
heap = list(nums)
heapq.heapify(heap)
heapq.heappop(heap)

### 优先级队列

如果你想在多个线程中使用同一个队列，那么你需要增加适当的锁和信号量机制。

In [None]:
import heapq

class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0

    def push(self, item, priority):
        heapq.heappush(self._queue, (-priority, self._index, item))
        # 等优先级元素时根据 _index 排序
        self._index += 1

    def pop(self):
        return heapq.heappop(self._queue)[-1]

In [None]:
class Item:
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return 'Item({!r})'.format(self.name)

q = PriorityQueue()
q.push(Item('foo'), 1)
q.push(Item('bar'), 5)
q.push(Item('spam'), 4)
q.push(Item('grok'), 1)
print(q.pop())
print(q.pop())
print(q.pop())
print(q.pop())

### defaultdict

from collections import defaultdict

d = defaultdict(list)
d['a'].append(1)
d['a'].append(2)
d['b'].append(4)

### OrderedDict

In [None]:
from collections import OrderedDict

d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4

### 二次排序

In [None]:
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}
sorted(zip(prices.values(), prices.keys()))

### 字典运算

In [None]:
a = {
    'x' : 1,
    'y' : 2,
    'z' : 3
}

b = {
    'w' : 10,
    'x' : 11,
    'y' : 2
}

print(a.keys() & b.keys())
print(a.keys() - b.keys())
print(a.items() & b.items())

### 消除重复且不改变顺序

In [None]:
def dedupe(items):
    seen = set()
    for item in items:
        if item not in seen:
            yield item
            seen.add(item)

In [None]:
a = [1, 5, 2, 1, 9, 1, 5, 10]
list(dedupe(a))

### Counter

In [None]:
from collections import Counter

words = [
    'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
    'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
    'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
    'my', 'eyes', "you're", 'under'
]

word_counts = Counter(words)
word_counts.update(words)
word_counts

### 排序，根据词典key

In [None]:
from operator import itemgetter

rows = [
    {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
    {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
    {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
    {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]

sorted(rows, key=itemgetter('fname'))
sorted(rows, key=itemgetter('lname','fname'))

# 速度稍慢
sorted(rows, key=lambda r: r['fname'])

### 排序，根据实例attr

In [None]:
from operator import attrgetter


class User:
    def __init__(self, user_id):
        self.user_id = user_id

    def __repr__(self):
        return 'User({})'.format(self.user_id)

users = [User(23), User(3), User(99)]
sorted(users, key=attrgetter('user_id'))
# sorted(users, key=attrgetter('last_name', 'first_name'))

# 速度稍慢
sorted(users, key=lambda u: u.user_id)

### 词典分组

首先排序，然后 groupby

In [None]:
from operator import itemgetter
from itertools import groupby


rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]


# Sort by the desired field first
rows.sort(key=itemgetter('date'))
# Iterate in groups
for date, items in groupby(rows, key=itemgetter('date')):
    print(date)
    for i in items:
        print(' ', i)

### 列表筛选

In [None]:
mylist = [1, 4, -5, 10, -7, 2, 3, -1]

# 列表推导
list(n for n in mylist if n > 0)

# filter
def larger_zero(val):
    return val > 0

list(filter(larger_zero, mylist))

# compress
from itertools import compress

more5 = [n > 0 for n in mylist]
list(compress(mylist, more5))

### namedtuple

命名元组是不可更改的

s = s._replace(shares=75)

> 很多实例属性的高效数据结构: 包含 __slots__ 方法的类

In [None]:
from collections import namedtuple

Subscriber = namedtuple('Subscriber', ['addr', 'joined'])
sub = Subscriber('jonesy@example.com', '2012-10-19')
print(sub)

# 支持元组操作
print(len(sub))
addr, joined = sub

### 合并词典

In [None]:
from collections import ChainMap

a = {'x': 1, 'z': 3 }
b = {'y': 2, 'z': 4 }

# 对于字典的更新或删除操作总是影响的是列表中第一个字典
c = ChainMap(a,b)

### 排列组合

In [None]:
from itertools import permutations, combinations, combinations_with_replacement

items = ['a', 'b', 'c']
# 有序
for p in permutations(items, 2):
    print(p)

In [None]:
# 无序
for c in combinations(items, 2):
    print(c)

In [None]:
# 有放回
for c in combinations_with_replacement(items, 2):
    print(c)

### zip

In [None]:
xpts = [1, 5, 4, 2, 10, 7, 8]
ypts = [101, 78, 37, 15, 62, 99]
for x, y in zip(xpts, ypts):
    print(x, y)

In [None]:
from itertools import zip_longest

for x, y in zip_longest(xpts, ypts, fillvalue=0):
    print(x, y)

### chain

In [None]:
from itertools import chain
a = [1, 2, 3, 4]
b = ['x', 'y', 'z']
for x in chain(a, b):
    print(x)

### heapq.merge

In [None]:
import heapq
a = [1, 4, 7, 10]
b = [2, 5, 6, 11]
for c in heapq.merge(a, b):
    print(c)

## 固定大小记录的文件迭代

In [None]:
# from functools import partial

# RECORD_SIZE = 32

# with open('somefile.data', 'rb') as f:
#     records = iter(partial(f.read, RECORD_SIZE), b'')
#     for r in records:
#         ...

### 文件存在

In [None]:
import os

os.path.exists('/etc/passwd')
os.path.isfile('/etc/passwd')
os.path.isdir('/etc/passwd')
os.path.islink('/usr/local/bin/python3')
os.path.realpath('/usr/local/bin/python3')
os.listdir('/etc')

### 文件路径名的操作

In [None]:
import os
path = '/Users/beazley/Data/data.csv'

# Get the last component of the path
os.path.basename(path)

# Get the directory name
os.path.dirname(path)

# Join path components together
os.path.join('tmp', 'data', os.path.basename(path))

# Expand the user's home directory
path = '~/Data/data.csv'
os.path.expanduser(path)

# Split the file extension
os.path.splitext(path)

### 文件名编码

surrogateescape:
这种是Python在绝大部分面向OS的API中所使用的错误处理器，
它能以一种优雅的方式处理由操作系统提供的数据的编码问题。
在解码出错时会将出错字节存储到一个很少被使用到的Unicode编码范围内。
在编码时将那些隐藏值又还原回原先解码失败的字节序列。
它不仅对于OS API非常有用，也能很容易的处理其他情况下的编码错误。

In [62]:
import sys

def bad_filename(filename):
    temp = filename.encode(sys.getfilesystemencoding(), errors='surrogateescape')
    return temp.decode('latin-1')

'utf-8'

### 将字节写入文本文件

In [None]:
import sys
sys.stdout.buffer.write(b'Hello\n')

### 创建临时文件和文件夹

In [65]:
from tempfile import TemporaryFile

with TemporaryFile('w+t') as f:
    # Read/write to the file
    f.write('Hello World\n')
    f.write('Testing\n')

    # Seek back to beginning and read the data
    f.seek(0)
    data = f.read()

# Temporary file is destroyed

In [64]:
from tempfile import NamedTemporaryFile

with NamedTemporaryFile('w+t') as f:
    print('filename is:', f.name)
    ...

# File automatically destroyed

filename is: /var/folders/_2/bcj87l1d4r92fwq81k7wptd80000gn/T/tmpatp2bgpq


In [66]:
from tempfile import TemporaryDirectory

with TemporaryDirectory() as dirname:
    print('dirname is:', dirname)
    # Use the directory
    ...
# Directory and all contents destroyed

dirname is: /var/folders/_2/bcj87l1d4r92fwq81k7wptd80000gn/T/tmpgthpqokf


### 序列化Python对象

In [None]:
# import pickle

# f = open('somefile', 'wb')
# pickle.dump(data, f)

# # Restore from a file
# f = open('somefile', 'rb')
# data = pickle.load(f)

### Struct

https://python3-cookbook.readthedocs.io/zh_CN/latest/c06/p11_read_write_binary_arrays_of_structures.html

https://python3-cookbook.readthedocs.io/zh_CN/latest/c06/p12_read_nested_and_variable_sized_binary_structures.html


### 命名冲突

定义的一个变量和某个保留关键字冲突，这时候可以使用单下划线作为后缀

In [None]:
lambda_ = 2.0 # Trailing _ to avoid clash with lambda keyword