In [2]:
# __getitem__ 与 __len__
import collections

# namedtuple可以构建只有少数属性，没有方法的简单类
Card = collections.namedtuple('Card', ['rank', 'suit'])

class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diamonds cluds hearts'.split()

    def __init__(self):
        self._cards = [Card(rank, suit) for rank in self.ranks
                                        for suit in self.suits]

    def __len__(self):
        return len(self._cards)

    def __getitem__(self, position):
        return self._cards[position]

In [3]:
# 使用namedtuple可以快速得到一个Card类
beer_card = Card('7', 'diamods')
beer_card

Card(rank='7', suit='diamods')

In [4]:
# 查看FrenchDeck的长度
deck = FrenchDeck()
len(deck)

52

In [5]:
# 抽取FrenchDeck类里任意位置的牌
deck[-1]

Card(rank='A', suit='hearts')

In [7]:
# 随机抽取卡牌
from random import choice
choice(deck)

Card(rank='6', suit='spades')

In [8]:
choice(deck)

Card(rank='8', suit='diamonds')

In [9]:
choice(deck)

Card(rank='Q', suit='spades')

In [10]:
# 由于__getitem__方法把[]操作交给了self._cards列表，所以它支持自动切片操作
deck[:3]

[Card(rank='2', suit='spades'),
 Card(rank='2', suit='diamonds'),
 Card(rank='2', suit='cluds')]

In [11]:
deck[12::13]

[Card(rank='5', suit='spades'),
 Card(rank='8', suit='diamonds'),
 Card(rank='J', suit='cluds'),
 Card(rank='A', suit='hearts')]

In [12]:
# 另外仅仅实现了__getitem__方法，这个类就变成了可迭代的
for card in deck:
    print(card)

Card(rank='2', suit='spades')
Card(rank='2', suit='diamonds')
Card(rank='2', suit='cluds')
Card(rank='2', suit='hearts')
Card(rank='3', suit='spades')
Card(rank='3', suit='diamonds')
Card(rank='3', suit='cluds')
Card(rank='3', suit='hearts')
Card(rank='4', suit='spades')
Card(rank='4', suit='diamonds')
Card(rank='4', suit='cluds')
Card(rank='4', suit='hearts')
Card(rank='5', suit='spades')
Card(rank='5', suit='diamonds')
Card(rank='5', suit='cluds')
Card(rank='5', suit='hearts')
Card(rank='6', suit='spades')
Card(rank='6', suit='diamonds')
Card(rank='6', suit='cluds')
Card(rank='6', suit='hearts')
Card(rank='7', suit='spades')
Card(rank='7', suit='diamonds')
Card(rank='7', suit='cluds')
Card(rank='7', suit='hearts')
Card(rank='8', suit='spades')
Card(rank='8', suit='diamonds')
Card(rank='8', suit='cluds')
Card(rank='8', suit='hearts')
Card(rank='9', suit='spades')
Card(rank='9', suit='diamonds')
Card(rank='9', suit='cluds')
Card(rank='9', suit='hearts')
Card(rank='10', suit='spades')
C

In [13]:
# 还可以反向迭代
for card in reversed(deck):
    print(card)

Card(rank='A', suit='hearts')
Card(rank='A', suit='cluds')
Card(rank='A', suit='diamonds')
Card(rank='A', suit='spades')
Card(rank='K', suit='hearts')
Card(rank='K', suit='cluds')
Card(rank='K', suit='diamonds')
Card(rank='K', suit='spades')
Card(rank='Q', suit='hearts')
Card(rank='Q', suit='cluds')
Card(rank='Q', suit='diamonds')
Card(rank='Q', suit='spades')
Card(rank='J', suit='hearts')
Card(rank='J', suit='cluds')
Card(rank='J', suit='diamonds')
Card(rank='J', suit='spades')
Card(rank='10', suit='hearts')
Card(rank='10', suit='cluds')
Card(rank='10', suit='diamonds')
Card(rank='10', suit='spades')
Card(rank='9', suit='hearts')
Card(rank='9', suit='cluds')
Card(rank='9', suit='diamonds')
Card(rank='9', suit='spades')
Card(rank='8', suit='hearts')
Card(rank='8', suit='cluds')
Card(rank='8', suit='diamonds')
Card(rank='8', suit='spades')
Card(rank='7', suit='hearts')
Card(rank='7', suit='cluds')
Card(rank='7', suit='diamonds')
Card(rank='7', suit='spades')
Card(rank='6', suit='hearts'

In [15]:
# in运算符同样可以在这个类上使用
# 迭代通常是隐式的，如果一个类没有实现__contains__方法，那么in运算符会按顺序执行一次迭代搜索
Card('Q', 'hearts') in deck

True

In [16]:
Card('Q', 'heart') in deck

False

In [17]:
# 模拟数值类型 
from math import hypot

class Vector:
    def __init__(self, x, y):
        self.x = x
        self.y = y

    # 字符串表示形式 __repr__
    # 一个对象如果没有__str__，而python又需要调用它时，解释器会用__repr__作为替代
    def __repr__(self):
        return "Vector(%r, %r)" % (self.x, self.y)

    def __abs__(self):
        return hypot(self.x, self.y)
    
    # 默认情况下，自定义的类的实例总被认为是真的
    # 除非这个类对__bool__或者__len__函数有自己的实现
    # bool(x)的背后调用x.__bool__()的结果；如果不存在__bool__方法，那么bool(x)会尝试调用x.__len__()
    # 若返回0，则bool返回False，否则返回True
    def __bool__(self):
        return bool(abs(self))

    def __add__(self, other):
        x = self.x + other.x
        y = self.y + other.y
        return Vector(x, y)

    def __mul__(self, scalar):
        return Vector(self.x * scalar, self.y * scalar)


In [18]:
v1 = Vector(2, 4)
v2 = Vector(1, 3)
v1 + v2

Vector(3, 7)

In [19]:
abs(v2)

3.1622776601683795

In [21]:
v1 * 3

Vector(6, 12)

In [1]:

# 序列构成的数组
# 容器序列：list、tuple、collections.deque - 存放的是它们所包含的任意类型的对象的引用
# 扁平序列：str、bytes、bytearray、memoryview、array.array - 存放的是值而不是引用，实质上是一段连续的内存
# 可变序列：list、collections.deque、bytearray、array.array、memoryview
# 不可变序列：str、tuple、bytes

In [2]:
# 列表推导式 - list comprehension
# 通常的原则：只用列表推导式创建新的列表，并尽量保持简短
symbols = "#$%^&"
code = []

for symbol in symbols:
    code.append(ord(symbol))

code

[35, 36, 37, 94, 38]

In [3]:
code = [ord(symbol) for symbol in symbols]

code

[35, 36, 37, 94, 38]

In [5]:
# list comprehension 与 filter/map
symbols = "#$%^*!"
ascii_list = [ord(x) for x in symbols if ord(x) > 37]

ascii_list

[94, 42]

In [7]:
ascii_list = list(filter(lambda x: x > 37, map(ord, symbols)))

ascii_list

[94, 42]

In [8]:
# 使用列表推导计算笛卡尔积
colors = ['white', 'black']
sizes = ['S', 'M', 'L']

tshirts = [(color, size) for color in colors for size in sizes]

tshirts

[('white', 'S'),
 ('white', 'M'),
 ('white', 'L'),
 ('black', 'S'),
 ('black', 'M'),
 ('black', 'L')]

In [9]:
# 生成器表达式：遵从迭代器协议，可以逐个地产出元素，而不是先建立一个完整的列表
# 再把这个列表传递到某个构造函数里，可以节省更多的内存
# 使用生成器表达式计算笛卡尔积
colors = ['white', 'black']
sizes = ['S', 'M', 'L']

for i in ('%s %s' % (color, size) for color in colors for size in sizes):
    print(i)

white S
white M
white L
black S
black M
black L


In [1]:
# tuple - 不仅仅是不可变的列表，还可以用于没有字段名的记录
# 可以通过元素及其位置来记录更多的东西
lax_coordinates = (33.9425, -118.405056)  # 经纬度
city, year, pop, chg, area = ('Tokyo', 2003, 32450, 0.66, 8014) # 元组拆包
traveler_ids = [('USA', '31195855'), ('BRA', 'CE342567'), ('ESP', 'XDA205856')]

for passort in sorted(traveler_ids):
    # %格式运算符能被匹配到对应的元组元素上
    print('%s/%s' % (passort)) 

# 元组拆包
for country, _ in traveler_ids:
    print(country)

BRA/CE342567
ESP/XDA205856
USA/31195855
USA
BRA
ESP


In [2]:
# 还可以使用*运算符把一个可迭代对象拆开作为函数的参数
divmod(20, 8)

(2, 4)

In [3]:
t = (20, 8)
divmod(*t)

(2, 4)

In [5]:
# 在python中，函数用*args来获取不确定数量的参数算是一种经典写法
# 这一概念也被扩展到平行赋值中了
a, b, *rest  = range(5)
a, b, rest

(0, 1, [2, 3, 4])

In [6]:
# *运算符只能跟在一个变量后面，但这个变量可以出现在任何位置
a, *rest, b, c = range(5)
a, rest, b, c

(0, [1, 2], 3, 4)

In [7]:
# 具名元组
# 使用collections.namedtuple可以创建一个带字段名的元组和一个有名称的类
# namedtuple需要传入2个参数，一个是类名，一个是这个类的字段名称
# 后者可以是由数个字符串组成的可迭代对象，或者由空格隔开的字段名组成的字符串
from collections import namedtuple

City = namedtuple('City', 'name contry population coordinates')
tokyo = City('Tokyo', 'JP', 36.933, (35.68, 139.69))
tokyo

City(name='Tokyo', contry='JP', population=36.933, coordinates=(35.68, 139.69))

In [8]:
tokyo.name, tokyo.contry

('Tokyo', 'JP')

In [9]:
tokyo[1]

'JP'

In [10]:
# 具名元组专有属性
# _fields类属性 - 包含这个类所有字段名称的元组
tokyo._fields

('name', 'contry', 'population', 'coordinates')

In [11]:
# _asdict - 把具名元组以collections.OrderedDict的形式返回
tokyo._asdict()

{'name': 'Tokyo',
 'contry': 'JP',
 'population': 36.933,
 'coordinates': (35.68, 139.69)}

In [12]:
for key, value in tokyo._asdict().items():
    print(key, ': ', value)

name :  Tokyo
contry :  JP
population :  36.933
coordinates :  (35.68, 139.69)


In [13]:
# 序列的增量赋值
# += 调用__iadd__方法，*= 调用__imul__方法
# 两者在操作可变对象时，会直接在原来对象上进行改变 - 浅拷贝
# 操作不可变对象时，会生成新的不可变对象 - 深拷贝
l = [1, 2, 3]
id(l)


2710795763072

In [14]:
l *= 2
l

[1, 2, 3, 1, 2, 3]

In [15]:
id(l)

2710795763072

In [16]:
t = (1, 2, 3)
id(t)

2710795661376

In [17]:
t *= 2
t

(1, 2, 3, 1, 2, 3)

In [18]:
id(t)

2710782438656

In [19]:
# 一个关于+=的谜题
t = (1, 2, [20, 30])
t[2] += [40, 50]

TypeError: 'tuple' object does not support item assignment

In [20]:
# 尽管抛出了异常，但t[2]仍被改变了
# 因此：
# 1. 不要把可变对象放在元组中
# 2. 增量赋值不是一个原子操作
# 3. 多查看python字节码
t

(1, 2, [20, 30, 40, 50])

In [21]:
# list.sort() 与 sorted()
# list.sort是就地排序，不会产生一个新的列表
# sorted()会生成一个新的列表
# 两者均有2个可选的关键字参数 - reverse 及 key
fruits = ['apple', 'grape', 'banana', 'raspberry']
sorted(fruits) 

['apple', 'banana', 'grape', 'raspberry']

In [22]:
# 新建了一个按字母排序的列表，原列表没有发生变化
fruits

['apple', 'grape', 'banana', 'raspberry']

In [23]:
# 按字母降序
sorted(fruits, reverse=True)

['raspberry', 'grape', 'banana', 'apple']

In [25]:
# 按长度排序
sorted(fruits, key=len)

['apple', 'grape', 'banana', 'raspberry']

In [26]:
# fruits 自身被排序
fruits.sort()
fruits

['apple', 'banana', 'grape', 'raspberry']

In [29]:
# 在有序序列中用bisect查找某个元素的插入位置
import bisect
import sys

HAYSTACK = [1, 4, 5, 6, 8, 12, 15, 20, 21, 23, 23, 26, 29, 30]
NEEDLES = [0, 1, 2, 5, 8, 10, 22, 23, 29, 30, 31]

ROW_FMT = '{0:2d} @ {1:2d}    {2}{0:<2d}'

def demo(bisect_fn):
    for needle in reversed(NEEDLES):
        position = bisect_fn(HAYSTACK, needle)
        offset = position * '  |'
        print(ROW_FMT.format(needle, position, offset))

if sys.argv[-1] == 'left':
    bisect_fn = bisect.bisect_left
else:
    bisect_fn = bisect.bisect_right

print('DEMO:', bisect_fn.__name__)
print('haystack ->', ' '.join('%2d' % n for n in HAYSTACK))
demo(bisect_fn)

DEMO: bisect_right
haystack ->  1  4  5  6  8 12 15 20 21 23 23 26 29 30
31 @ 14      |  |  |  |  |  |  |  |  |  |  |  |  |  |31
30 @ 14      |  |  |  |  |  |  |  |  |  |  |  |  |  |30
29 @ 13      |  |  |  |  |  |  |  |  |  |  |  |  |29
23 @ 11      |  |  |  |  |  |  |  |  |  |  |23
22 @  9      |  |  |  |  |  |  |  |  |22
10 @  5      |  |  |  |  |10
 8 @  5      |  |  |  |  |8 
 5 @  3      |  |  |5 
 2 @  1      |2 
 1 @  1      |1 
 0 @  0    0 


In [31]:
# bisect可以用来建立一个用数字作为索引的查询表格
# 比如把分数和等级对应起来
def grade(score, breakpoint=[60, 70, 80, 90], grades='FDCBA'):
    i = bisect.bisect(breakpoint, score)
    return grades[i]

[grade(score) for score in [33, 50, 68, 72, 88, 99]]

['F', 'F', 'D', 'C', 'B', 'A']

In [33]:
# 用bisect.insort在已排序好的序列中插入新的元素
# insort(seq, item)把变量item插入带序列seq中，并能保持seq的升序顺序
import bisect
import random

SIZE = 7
random.seed(1729)

my_list = []
for i in range(SIZE):
    new_item = random.randrange(SIZE * 2)
    bisect.insort(my_list, new_item)
    print('%2d ->' % new_item, my_list)

10 -> [10]
 0 -> [0, 10]
 6 -> [0, 6, 10]
 8 -> [0, 6, 8, 10]
 7 -> [0, 6, 7, 8, 10]
 2 -> [0, 2, 6, 7, 8, 10]
10 -> [0, 2, 6, 7, 8, 10, 10]


In [34]:
# 数组 - 如果我们需要一个只包含数字的列表，那么array.array比list更高效
# 数组支持所有跟可变序列有关的操作，包括.pop、.insert和.extend
# 还提供从文件读取和存入文件更快的方法，如.frombytes 和 .tofile
# 创建一个有1000万个随机浮点数的数组
from array import array
from random import random

# 'd' - 双精度浮点数
floats = array('d', (random() for i in range(10**7)))
floats[-1]

0.5963321947530882

In [35]:
# 存入文件
fp = open('./floats.bin', 'wb')
floats.tofile(fp)
fp.close()

In [36]:
# 新建一个空的双精度浮点数组
floats2 = array('d')

fp = open('./floats.bin', 'rb')
# 从文件读取10**7个浮点数
floats2.fromfile(fp, 10**7)
fp.close()
floats2[-1]

0.5963321947530882

In [37]:
floats == floats2 

True

In [38]:
# 内存视图 - memoryview
# 可以让用户在不复制内容的情况下操作同一个数组的不同切片
# 通过改变数组中的一个字节来更新数组的某一元素的值

# 'h' - 短整型有符号
numbers = array('h', [-2, -1, 0, 1, 2])

# 创建内存视图
memv = memoryview(numbers)

len(memv)

5

In [39]:
memv[0]

-2

In [40]:
# 创建一个memv_oct，将memv的内容转化为'B' - 无符号字符
memv_oct = memv.cast('B')

memv_oct.tolist()

[254, 255, 255, 255, 0, 0, 1, 0, 2, 0]

In [41]:
memv_oct[5] = 4
numbers

array('h', [-2, -1, 1024, 1, 2])

In [4]:
# 双向队列
# collections.deque是一个线程安全，可以快速从两端添加或者删除元素的数据类型
from collections import deque

# deque可以接受一个maxlen的可选参数，该参数定义了队列的长度，且一旦确定就不可以更改
dq = deque(range(10), maxlen=10)

dq

deque([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [5]:
# rotate可以将队列最右边的对应元素数量旋转到左边
dq.rotate(3)

dq

deque([7, 8, 9, 0, 1, 2, 3, 4, 5, 6])

In [6]:
# 当rotate接受一个负数的参数时，它会将最左边的元素旋转到最右边
dq.rotate(-4)

dq

deque([1, 2, 3, 4, 5, 6, 7, 8, 9, 0])

In [7]:
# 当一个已满队列新增元素时，反向端的元素会被挤掉
dq.appendleft(-1)

dq

deque([-1, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [8]:
dq.extend([11, 22, 33])

dq

deque([3, 4, 5, 6, 7, 8, 9, 11, 22, 33])

In [9]:
dq.extendleft([10, 20, 30, 40])

dq

deque([40, 30, 20, 10, 3, 4, 5, 6, 7, 8])

In [10]:
# 创建字典的不同方式
a = dict(one=1, two=2, three=3)
b = {'one': 1, 'two': 2, 'three': 3}
c = dict(zip(['one', 'two', 'three'], [1, 2, 3]))
d = dict([('two', 2), ('one', 1), ('three', 3)])
e = dict({'three': 3, 'one': 1, 'two': 2})

a == b == c == d == e

True

In [11]:
# 字典推导式
dial_codes = [
    (86, 'China'),
    (91, 'India'),
    (1, 'United States'),
    (62, 'Indonesia'),
    (55, 'Brazil'),
    (92, 'Pakistan'),
    (880, 'Bangladesh'),
    (234, 'Nigeria'),
    (7, 'Russia'),
    (81, 'Japen')
]

country_codes = {country: code for code, country in dial_codes}

country_codes

{'China': 86,
 'India': 91,
 'United States': 1,
 'Indonesia': 62,
 'Brazil': 55,
 'Pakistan': 92,
 'Bangladesh': 880,
 'Nigeria': 234,
 'Russia': 7,
 'Japen': 81}

In [13]:
# 更多参数
{code: country.upper() for country, code in country_codes.items() if code < 66}

{1: 'UNITED STATES', 62: 'INDONESIA', 55: 'BRAZIL', 7: 'RUSSIA'}

In [None]:
# setdefault - 处理找不到的键
import sys
import re

word_re = re.compile(r'\w+')

index = {}
with open(sys.argv[1], encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in  word_re.finditer(line):
            word = match.group()
            c_no = match.start() + 1
            location = (line_no, c_no)
            index.setdefault(word, []).append(location)

for word in sorted(index, key=str.upper):
    print(word, index[word]) 

In [2]:
# 一个特殊的字典：在查询时把非字符串的键转为支付串
class StrDict0(dict):

    def __missing__(self, key):
        # 如果找不到的键本身就是字符串，则抛出异常
        if isinstance(key, str):
            raise KeyError(key)
        # 如果找不到的键不是字符串，那么将她转为字符串再查找    
        return self[str(key)]

    # get方法把查找工作用self[key]的形式委托给__getitem__
    # 这样在宣布失败之前，还能通过__missing__再次重试
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default

    # 先按照传入键的原本的值来查找（我们的映射类型中可能含有非字 
    # 符串的键），如果没找到，再用 str() 方法把键转换成字符串再查找 一次。
    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()

In [3]:
d = StrDict0([('2', 'two'), ('4', 'four')])
d['2']

'two'

In [4]:
d[4]

'four'

In [5]:
# UserDict - 纯python实现的的类
# 自定义映射类型是，以UserDict为基类，比使用Dict方便
from collections import UserDict

class StrKeyDict(UserDict):
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]

    def __contains__(self, key):
        return str(key) in self.data

    def __setitem__(self, key, item):
        self.data[str(key)] = item

In [6]:
# 不可变映射类型
# 用MappingProxyType来获取字典的只读实例
from types import MappingProxyType

d = {1: 'A'}
d_proxy = MappingProxyType(d)
d_proxy

mappingproxy({1: 'A'})

In [7]:
# d中的内容可以通过d_proxy看到
d_proxy[1]

'A'

In [8]:
# 但是不能通过d_proxy进行任何修改
d_proxy[2] = 'x'

TypeError: 'mappingproxy' object does not support item assignment

In [9]:
# d_proxy是动态的，对d的任何修改都会反馈到它上面
d[2] = 'B'
d_proxy

mappingproxy({1: 'A', 2: 'B'})

In [10]:
d_proxy[2]

'B'

In [11]:
# 集合 set 的本质是许多唯一对象的聚集
# 因此，集合可以用于去重
# 集合中的元素必须是可 hash 的，set 类型本身是不可 hash 的
l = ['spam', 'spam', 'spam', 'eggs', 'spam']
set(l)

{'eggs', 'spam'}

In [12]:
list(set(l))

['eggs', 'spam']