# 1.1 将序列分解为单独的变量

In [13]:
p = (4, 5)
x, y = p
print(x)

4


In [15]:
data = ['ACME', 50, 91.1, (2012, 12, 21)]
name, shares, price, date = data
print(name)
print(date)

ACME
(2012, 12, 21)


In [5]:
name, shares, price, (year, mon, day) = data
print(name)
print(mon)
print(day)

'ACME'

In [16]:
s = 'Hello'
a, b, c, d, e = s
print(a)
print(b)
print(e)

H
e
o


In [19]:
data = ['ACME', 50, 91.1, (2012, 12, 21)]
_, shares, price, _ = data
print(shares)
print(price)

50
91.1


# 1.2 从任意长度的可迭代对象中分解元素

In [20]:
def drop_first_last(grades):
    first, *middle, last = grades
    return avg(middle)

In [22]:
record = ('Dave', 'dave@example.com', '773-555-1212', '847-555-1212')
name, email, *phone_numbers = record
print(name)
print(email)
print(phone_numbers)

Dave
dave@example.com
['773-555-1212', '847-555-1212']


In [23]:
records = [
    ('foo', 1, 2),
    ('bar', 'hello'),
    ('foo', 3, 4),
]

def do_foo(x, y):
    print('foo', x, y)
    
def do_bar(s):
    print('bar', s)
    
for tag, *args in records:
    if tag == 'foo':
        do_foo(*args)
    elif tag == 'bar':
        do_bar(*args)

foo 1 2
bar hello
foo 3 4


In [24]:
record = ('ACME', 50, 123.45, (12, 18, 2012))
name, *_, (*_, year) = record
print(name)
print(year)

ACME
2012


In [25]:
items = [1, 10, 7, 4, 5, 9]
head, *tail = items
print(head)
print(tail)

1
[10, 7, 4, 5, 9]


# 1.3 保存最后N个元素

In [33]:
from collections import deque

def search(lines, pattern, history=5):
    previous_lines = deque(maxlen=history)
    for line in lines:
        if pattern in line:
            yield line, previous_lines
        previous_lines.append(line)
        
# Example use on a file
if __name__ == '__main__':
    with open('somefile.txt') as f:
        for line, prevlines in search(f, 'python', 5):
            for pline in prevlines:
                print(pline, end='')
            print(line, end='')
            print('-'*20)

In [34]:
q = deque(maxlen=3)
q.append(1)
q.append(2)
q.append(3)
print(q)
q.append(4)
print(q)
q.append(5)
print(q)

deque([1, 2, 3], maxlen=3)
deque([2, 3, 4], maxlen=3)
deque([3, 4, 5], maxlen=3)


In [35]:
q = deque()
q.append(1)
q.append(2)
q.append(3)
print(q)
q.appendleft(4)
print(q)
print(q.pop())
print(q)
print(q.popleft)

deque([1, 2, 3])
deque([4, 1, 2, 3])
3
deque([4, 1, 2])
<built-in method popleft of collections.deque object at 0x000001B7BF8A8760>


# 1.4 找到最大和最小的N各元素

In [37]:
import heapq

nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums))  # prints [42, 37, 23]
print(heapq.nsmallest(3, nums))  # prints [-4, 1, 2]

[42, 37, 23]
[-4, 1, 2]


In [40]:
portfolio = [
    {'name': 'IBM', 'shares': 100, 'price': 91.1},
    {'name': 'AAPL', 'shares': 50, 'price':543.22},
    {'name': 'FB', 'shares': 200, 'price': 21.09},
    {'name': 'HPQ', 'shares':35, 'price': 31.75},
    {'name': 'YHOO', 'shares': 45, 'price': 16.35},
    {'name': 'ACME', 'shares': 75, 'price': 115.65}
]

cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])

print(cheap)
print(expensive)

[{'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.75}]
[{'name': 'AAPL', 'shares': 50, 'price': 543.22}, {'name': 'ACME', 'shares': 75, 'price': 115.65}, {'name': 'IBM', 'shares': 100, 'price': 91.1}]


In [41]:
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
heap = list(nums)
heapq.heapify(heap)
print(heap)

[-4, 2, 1, 23, 7, 2, 18, 23, 42, 37, 8]


In [42]:
print(heapq.heappop(heap))
print(heapq.heappop(heap))
print(heapq.heappop(heap))

-4
1
2


# 1.5 实现优先级队列

In [46]:
import heapq
class PriorityQueue:
    
    def __init__(self):
        self._queue = []
        self._index = 0
    
    def push(self, item, priority):
        heapq.heappush(self._queue, (-priority, self._index, item))
        self._index += 1
    
    def pop(self):
        return heapq.heappop(self._queue)[-1]

In [48]:
class Item:
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return 'Item({!r})'.format(self.name)

q = PriorityQueue()
q.push(Item('foo'), 1)
q.push(Item('bar'), 5)
q.push(Item('spam'), 4)
q.push(Item('grok'), 1)
print(q.pop())
print(q.pop())
print(q.pop())
print(q.pop())

Item('bar')
Item('spam')
Item('foo')
Item('grok')


In [49]:
a = Item('foo')
b = Item('bar')
a < b

TypeError: '<' not supported between instances of 'Item' and 'Item'

In [51]:
a = (1, Item('foo'))
b = (5, Item('bar'))
print(a < b)
c = (1, Item('grok'))
a < c

True


TypeError: '<' not supported between instances of 'Item' and 'Item'

In [52]:
a = (1, 0, Item('foo'))
b = (5, 1, Item('bar'))
c = (1, 2, Item('grok'))
print(a < b)
print(a < c)

True
True


# 1.6 在字典中将键映射到多个值上

In [53]:
d = {
    'a': [1, 2, 3],
    'b': [4, 5]
}

e = {
    'a': {1, 2, 3},
    'b': {4, 5}
}

In [54]:
from collections import defaultdict

d = defaultdict(list)
d['a'].append(1)
d['a'].append(2)
d['b'].append(4)

d = defaultdict(set)
d['a'].add(1)
d['a'].add(2)
d['b'].add(4)

In [55]:
d = defaultdict(list)
for key, value in pairs:
    d[key].append(value)

NameError: name 'pairs' is not defined

# 1.7 让字典保持有序

In [57]:
from collections import OrderedDict

d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4

for key in d:
    print(key, d[key])

foo 1
bar 2
spam 3
grok 4


In [59]:
import json
json.dumps(d)

'{"foo": 1, "bar": 2, "spam": 3, "grok": 4}'

# 1.8 与字典有关的计算问题

In [61]:
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}

In [64]:
min_price = min(zip(prices.values(), prices.keys()))
max_price = max(zip(prices.values(), prices.keys()))
print(min_price)
print(max_price)

(10.75, 'FB')
(612.78, 'AAPL')


In [66]:
prices_sorted = sorted(zip(prices.values(), prices.keys()))
print(prices_sorted)

[(10.75, 'FB'), (37.2, 'HPQ'), (45.23, 'ACME'), (205.55, 'IBM'), (612.78, 'AAPL')]


In [67]:
prices_and_names = zip(prices.values(), prices.keys())
print(min(prices_and_names))
print(max(prices_and_names))

(10.75, 'FB')


ValueError: max() arg is an empty sequence

In [68]:
print(min(prices))
print(max(prices))

AAPL
IBM


In [69]:
print(min(prices.values()))
print(max(prices.values()))

10.75
612.78


In [73]:
print(min(prices, key=lambda k: prices[k]))
print(max(prices, key=lambda k: prices[k]))

min_value = prices[min(prices, key=lambda k: prices[k])]
print(min_value)

FB
AAPL
10.75


In [78]:
prices = {'AAA': 45.23, 'ZZZ': 45.23}
print("min =", min(zip(prices.values(), prices.keys())))
print("max =", max(zip(prices.values(), prices.keys())))

min = (45.23, 'AAA')
max = (45.23, 'ZZZ')


# 1.9 在两个字典中寻找相同点

In [79]:
a = {
    'x': 1,
    'y': 2,
    'z': 3
}

b = {
    'w': 10,
    'x': 11,
    'y': 2
}

In [84]:
# Find keys in commen
print(a.keys() & b.keys())

# Find keys in a that are not in b
print(a.keys() - b.keys())

# Find (key, value) pairs in common
print(a.items() & b.items())

{'y', 'x'}
{'z'}
{('y', 2)}


In [86]:
# Make a new dictionary with certain keys removed
c = {key:a[key] for key in a.keys() - {'z', 'w'}}
c

{'y': 2, 'x': 1}

# 1.10 从序列中移除重复项且保持元素间顺序不变

In [87]:
def dedupe(items):
    seen = set()
    for item in items:
        if item not in seen:
            yield item
            seen.add(item)

In [89]:
a = [1, 5, 2, 1, 9, 1, 5, 10]
list(dedupe(a))

[1, 5, 2, 9, 10]

In [90]:
def dedupe(items, key=None):
    seen = set()
    for item in items:
        val = item if key is None else key(item)
        if val not in seen:
            yield item
            seen.add(val)

In [93]:
a = [{'x':1, 'y':2}, {'x':1, 'y':3}, {'x':1, 'y':2}, {'x':2, 'y':4}]
print(list(dedupe(a, key=lambda d: (d['x'], d['y']))))
print(list(dedupe(a, key=lambda d: d['x'])))

[{'x': 1, 'y': 2}, {'x': 1, 'y': 3}, {'x': 2, 'y': 4}]
[{'x': 1, 'y': 2}, {'x': 2, 'y': 4}]


# 1.11 对切片命名

In [95]:
items = [0, 1, 2, 3, 4, 5, 6]
a = slice(2, 4)
print(items[2:4])
print(items[a])
items[a] = [10,11]
print(items)
del items[a]
print(items)

[2, 3]
[2, 3]
[0, 1, 10, 11, 4, 5, 6]
[0, 1, 4, 5, 6]


# 1.12 找出序列中出现次数最多的元素

In [105]:
words = [
    'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
    'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
    'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
    'my', 'eyes', "you're", 'under'
]

from collections import Counter
word_counts = Counter(words)
top_three = word_counts.most_common(3)
print(top_three)

[('eyes', 8), ('the', 5), ('look', 4)]


In [106]:
print(word_counts['not'])
print(word_counts['eyes'])

1
8


In [107]:
morewords = ['why', 'are', 'you', 'not', 'looking', 'in', 'my', 'eyes']
for word in morewords:
    word_counts[word] += 1

# word_counts.update(morewords)
print(word_counts['eyes'])

9


In [108]:
a = Counter(words)
b = Counter(morewords)

# Combine counts
c = a + b
print(c)

# Subtract counts
d = a - b
print(d)

Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1, 'why': 1, 'are': 1, 'you': 1, 'looking': 1, 'in': 1})
Counter({'eyes': 7, 'the': 5, 'look': 4, 'into': 3, 'my': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1})


# 1.13 通过公共键对字典列表排序

In [109]:
rows = [
    {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
    {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
    {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
    {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]

In [110]:
from operator import itemgetter

rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_uid = sorted(rows, key=itemgetter('uid'))

print(rows_by_fname)
print(rows_by_uid)

[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
[{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}]


In [111]:
rows_by_lfname = sorted(rows, key=itemgetter('lname','fname'))
print(rows_by_lfname)

[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]


In [113]:
rows_by_fname = sorted(rows, key=lambda r: r['fname'])
rows_by_lfname = sorted(rows, key=lambda r: (r['lname'], r['fname']))
print(rows_by_fname)
print(rows_by_lfname)

[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]


In [114]:
print(min(rows, key=itemgetter('uid')))
print(max(rows, key=itemgetter('uid')))

{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}


# 1.14 对不原生支持比较操作的对象排序

In [116]:
class User:
    def __init__(self, user_id):
        self.user_id = user_id
    def __repr__(self):
        return 'User({})'.format(self.user_id)

users = [User(23), User(3), User(99)]
print(users)
print(sorted(users, key=lambda u: u.user_id))

[User(23), User(3), User(99)]
[User(3), User(23), User(99)]


In [117]:
from operator import attrgetter
sorted(users, key=attrgetter('user_id'))

[User(3), User(23), User(99)]

In [119]:
# by_name = sorted(users, key=attrgetter('last_name', 'first_name'))

print(min(users, key=attrgetter('user_id')))
print(max(users, key=attrgetter('user_id')))

User(3)
User(99)


# 1.15 根据字段将记录分组

In [120]:
rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]

In [121]:
from operator import itemgetter
from itertools import groupby

# Sort by the desired field first
rows.sort(key=itemgetter('date'))

# Iterate in groups
for date, items in groupby(rows, key=itemgetter('date')):
    print(date)
    for i in items:
        print(' ', i)

07/01/2012
  {'address': '5412 N CLARK', 'date': '07/01/2012'}
  {'address': '4801 N BROADWAY', 'date': '07/01/2012'}
07/02/2012
  {'address': '5800 E 58TH', 'date': '07/02/2012'}
  {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
  {'address': '1060 W ADDISON', 'date': '07/02/2012'}
07/03/2012
  {'address': '2122 N CLARK', 'date': '07/03/2012'}
07/04/2012
  {'address': '5148 N CLARK', 'date': '07/04/2012'}
  {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}


In [122]:
from collections import defaultdict
rows_by_date = defaultdict(list)
for row in rows:
    rows_by_date[row['date']].append(row)

for r in rows_by_date['07/01/2012']:
    print(r)

{'address': '5412 N CLARK', 'date': '07/01/2012'}
{'address': '4801 N BROADWAY', 'date': '07/01/2012'}


# 1.16 筛选序列中的元素

In [124]:
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
print([n for n in mylist if n > 0])
print([n for n in mylist if n < 0])

[1, 4, 10, 2, 3]
[-5, -7, -1]


In [127]:
pos = (n for n in mylist if n > 0)
print(pos)
for x in pos:
    print(x)

<generator object <genexpr> at 0x000001B7BFF587B0>
1
4
10
2
3


In [129]:
values = ['1', '2', '-3', '-', '4', 'N/A', '5']

def is_int(val):
    try:
        x = int(val)
        return True
    except ValueError:
        return False

ivals = list(filter(is_int, values))
print(ivals)

['1', '2', '-3', '4', '5']


In [130]:
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
import math
[math.sqrt(n) for n in mylist if n > 0]

[1.0, 2.0, 3.1622776601683795, 1.4142135623730951, 1.7320508075688772]

In [132]:
clip_neg = [n if n > 0 else 0 for n in mylist]
print(clip_neg)
clip_pos = [n if n < 0 else 0 for n in mylist]
print(clip_pos)

[1, 4, 0, 10, 0, 2, 3, 0]
[0, 0, -5, 0, -7, 0, 0, -1]


In [133]:
addresses = [
    '5412 N CLARK',
    '5148 N CLARK',
    '5800 E 58TH',
    '2122 N CLARK',
    '5645 N RAVENSWOOD',
    '1060 W ADDISON',
    '4801 N BROADWAY',
    '1039 W GRANVILLE',
]

counts = [0, 3, 10, 4, 1, 7, 6, 1]

from itertools import compress
more5 = [n > 5 for n in counts]
print(more5)
print(list(compress(addresses, more5)))

[False, False, True, False, False, True, True, False]
['5800 E 58TH', '1060 W ADDISON', '4801 N BROADWAY']


# 1.17 从字典中提取子集

In [136]:
prices = {
    'ACME': 45.23,
    'AAPL':612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}

# Make a dictionary of all prices over 200
p1 = {key:value for key, value in prices.items() if value > 200}
print(p1)

# Make a dictionary of tech stocks
tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT'}
p2 = {key:value for key, value in prices.items() if key in tech_names}
print(p2)

{'AAPL': 612.78, 'IBM': 205.55}
{'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}


In [138]:
p1 = dict((key, value) for key, value in prices.items() if value > 200)

# Make a dictionary of tech stocks
tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT'}
p2 = {key:prices[key] for key in prices.keys() & tech_names}
print(p2)

{'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}


# 1.18 将名称映射到序列的元素中

In [139]:
from collections import namedtuple
Subscriber = namedtuple('Subscriber', ['addr', 'joined'])
sub = Subscriber('jonesy@example.com', '2012-10-19')
print(sub)
print(sub.addr)
print(sub.joined)

Subscriber(addr='jonesy@example.com', joined='2012-10-19')
jonesy@example.com
2012-10-19


In [140]:
print(len(sub))
addr, joined = sub
print(addr)
print(joined)

2
jonesy@example.com
2012-10-19


In [141]:
def compute_cost(records):
    total = 0.0
    for rec in records:
        total += rec[1] * rec[2]
    return total

In [143]:
from collections import namedtuple

Stock = namedtuple('Stock', ['name', 'shares', 'price'])

def compute_cost(records):
    total = 0.0
    for rec in records:
        s = Stock(*rec)
        total += s.shares * s.price
    return total

In [144]:
s = Stock('ACME', 100, 123.45)
print(s)
print(s.shares)

Stock(name='ACME', shares=100, price=123.45)
100


In [145]:
s = s._replace(shares=75)
print(s)

Stock(name='ACME', shares=75, price=123.45)


In [147]:
from collections import namedtuple

Stock = namedtuple('Stock', ['name', 'shares', 'price', 'date', 'time'])

# Create a prototype instance
stock_prototype = Stock('', 0, 0.0, None, None)

# Function to convert a dictionary to a Stock
def dict_to_stock(s):
    return stock_prototype._replace(**s)

In [148]:
a = {'name': 'ACME', 'shares': 100, 'price': 123.45}
print(dict_to_stock(a))
b = {'name': 'ACME', 'shares': 100, 'price': 123.45, 'date': '12/17/2012'}
print(dict_to_stock(b))

Stock(name='ACME', shares=100, price=123.45, date=None, time=None)
Stock(name='ACME', shares=100, price=123.45, date='12/17/2012', time=None)


# 1.19 同时对数据做转换和换算

In [151]:
nums = [1, 2, 3, 4, 5]
s = sum(x * x for x in nums)
print(nums)
print(s)

[1, 2, 3, 4, 5]
55


In [154]:
# Determine if any .py files exist in a directory
import os
files = os.listdir('D:\study')
if any(name.endswith('.py') for name in files):
    print('There be python!')
else:
    print('Sorry, no python.')
    
# Output a tuple as CSV
s = ('ACME', 50, 123.45)
print(','.join(str(x)) for x in s)

# Data reduction across fields of a data structures
portfolio = [
    {'name':'GOOG', 'shares':50},
    {'name':'YHOO', 'shares':75},
    {'name':'AOL', 'shares':20},
    {'name':'SCOX', 'shares':65}
]
min_shares = min(s['shares'] for s in portfolio)

There be python!
<generator object <genexpr> at 0x000001B7BFF5A8F0>


In [155]:
s = sum((x * x for x in nums))  # Pass generator-expr as argument
print(s)
s = sum(x * x for x in nums)  # More elegant syntax
print(s)

55
55


In [157]:
# Original
min_shares = min(s['shares'] for s in portfolio)
print(min_shares)

# Alternative
min_shares = min(portfolio, key=lambda s:s['shares'])
print(min_shares)

20
{'name': 'AOL', 'shares': 20}


# 1.20 将多个映射合并为单个映射

In [158]:
a = {'x':1, 'z':3}
b = {'y':2, 'z':4}

In [159]:
from collections import ChainMap
c = ChainMap(a, b)
print(c['x'])
print(c['y'])
print(c['z'])

1
2
3


In [160]:
print(len(c))
print(list(c.keys()))
print(list(c.values()))

3
['y', 'z', 'x']
[2, 3, 1]


In [161]:
values = ChainMap()
values['x'] = 1

# Add a new mapping
values = values.new_child()
values['x'] = 2

# Add a new mapping
values = values.new_child()
values['x'] = 3

print(values)
print(values['x'])

# Discard last mapping
values = values.parents
print(values['x'])

# Discard last mapping
values = values.parents
print(values['x'])

print(values)

ChainMap({'x': 3}, {'x': 2}, {'x': 1})
3
2
1
ChainMap({'x': 1})


In [162]:
a = {'x': 1, 'z': 3}
b = {'y': 2, 'z': 4}
merged = dict(b)
merged.update(a)
print(merged['x'])
print(merged['y'])
print(merged['z'])

1
2
3
