## 前置检查

In [None]:
!python --version

Python 3.10.12


### 第一部份 数据结构

Python 的数据模型相当问题

In [None]:
import collections

Card = collections.namedtuple('Card', ['rank', 'suit'])

class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diamonds clubs hearts'.split()

    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits
                                        for rank in self.ranks]

    def __len__(self):
        return len(self._cards)

    def __getitem__(self, position):
        return self._cards[position]

In [None]:
deck = FrenchDeck()
len(deck)

52

In [None]:
deck[0]

Card(rank='2', suit='spades')

In [None]:
deck[-1]

Card(rank='A', suit='hearts')

> 随机抽取一张牌，直接使用 Python 提供的函数



In [None]:
from random import choice
choice(deck)

Card(rank='2', suit='spades')

In [None]:
choice(deck)

Card(rank='6', suit='hearts')

In [None]:
for card in deck:
    print(card)

Card(rank='2', suit='spades')
Card(rank='3', suit='spades')
Card(rank='4', suit='spades')
Card(rank='5', suit='spades')
Card(rank='6', suit='spades')
Card(rank='7', suit='spades')
Card(rank='8', suit='spades')
Card(rank='9', suit='spades')
Card(rank='10', suit='spades')
Card(rank='J', suit='spades')
Card(rank='Q', suit='spades')
Card(rank='K', suit='spades')
Card(rank='A', suit='spades')
Card(rank='2', suit='diamonds')
Card(rank='3', suit='diamonds')
Card(rank='4', suit='diamonds')
Card(rank='5', suit='diamonds')
Card(rank='6', suit='diamonds')
Card(rank='7', suit='diamonds')
Card(rank='8', suit='diamonds')
Card(rank='9', suit='diamonds')
Card(rank='10', suit='diamonds')
Card(rank='J', suit='diamonds')
Card(rank='Q', suit='diamonds')
Card(rank='K', suit='diamonds')
Card(rank='A', suit='diamonds')
Card(rank='2', suit='clubs')
Card(rank='3', suit='clubs')
Card(rank='4', suit='clubs')
Card(rank='5', suit='clubs')
Card(rank='6', suit='clubs')
Card(rank='7', suit='clubs')
Card(rank='8', sui

In [None]:
for card in reversed(deck): # doctest: +ELLIPSIS
    print(card)

Card(rank='A', suit='hearts')
Card(rank='K', suit='hearts')
Card(rank='Q', suit='hearts')
Card(rank='J', suit='hearts')
Card(rank='10', suit='hearts')
Card(rank='9', suit='hearts')
Card(rank='8', suit='hearts')
Card(rank='7', suit='hearts')
Card(rank='6', suit='hearts')
Card(rank='5', suit='hearts')
Card(rank='4', suit='hearts')
Card(rank='3', suit='hearts')
Card(rank='2', suit='hearts')
Card(rank='A', suit='clubs')
Card(rank='K', suit='clubs')
Card(rank='Q', suit='clubs')
Card(rank='J', suit='clubs')
Card(rank='10', suit='clubs')
Card(rank='9', suit='clubs')
Card(rank='8', suit='clubs')
Card(rank='7', suit='clubs')
Card(rank='6', suit='clubs')
Card(rank='5', suit='clubs')
Card(rank='4', suit='clubs')
Card(rank='3', suit='clubs')
Card(rank='2', suit='clubs')
Card(rank='A', suit='diamonds')
Card(rank='K', suit='diamonds')
Card(rank='Q', suit='diamonds')
Card(rank='J', suit='diamonds')
Card(rank='10', suit='diamonds')
Card(rank='9', suit='diamonds')
Card(rank='8', suit='diamonds')
Card(r

> 迭代是隐式的, 如果一个容器没有实现 __contains__ 方法, 那么 in 运算符就会做一次顺序扫描.



In [None]:
Card('Q', 'hearts') in deck

True

In [None]:
suit_values = dict(spades=3, hearts=2, diamonds=1, clubs=0)

def spades_high(card):
    rank_value = FrenchDeck.ranks.index(card.rank)
    return rank_value * len(suit_values) + suit_values[card.suit]

suit_values[card.suit] # print card

3

In [None]:
for card in sorted(deck, key=spades_high):  # doctest: +ELLIPSIS
    print(card)


Card(rank='2', suit='clubs')
Card(rank='2', suit='diamonds')
Card(rank='2', suit='hearts')
Card(rank='2', suit='spades')
Card(rank='3', suit='clubs')
Card(rank='3', suit='diamonds')
Card(rank='3', suit='hearts')
Card(rank='3', suit='spades')
Card(rank='4', suit='clubs')
Card(rank='4', suit='diamonds')
Card(rank='4', suit='hearts')
Card(rank='4', suit='spades')
Card(rank='5', suit='clubs')
Card(rank='5', suit='diamonds')
Card(rank='5', suit='hearts')
Card(rank='5', suit='spades')
Card(rank='6', suit='clubs')
Card(rank='6', suit='diamonds')
Card(rank='6', suit='hearts')
Card(rank='6', suit='spades')
Card(rank='7', suit='clubs')
Card(rank='7', suit='diamonds')
Card(rank='7', suit='hearts')
Card(rank='7', suit='spades')
Card(rank='8', suit='clubs')
Card(rank='8', suit='diamonds')
Card(rank='8', suit='hearts')
Card(rank='8', suit='spades')
Card(rank='9', suit='clubs')
Card(rank='9', suit='diamonds')
Card(rank='9', suit='hearts')
Card(rank='9', suit='spades')
Card(rank='10', suit='clubs')
Ca

In [None]:
# 一个简单的二维向量类

"""
vector2d.py: 一个简单的类，演示一些特殊方法

加法:
    >>> v1 = Vector(2, 4)
    >>> v2 = Vector(2, 1)
    >>> v1 + v2
    Vector(4, 5)

绝对值:
    >>> v = Vector(3, 4)
    >>> abs(v)
    5.0

标量积:
    >>> v * 3
    Vector(9, 12)
    >>> abs(v * 3)
    15.0

"""

import math

class Vector:

    def __init__(self, x=0, y=0):
        self.x = x
        self.y = y

    def __repr__(self):
        return f'Vector({self.x!r}, {self.y!r})'

    def __abs__(self):
        return math.hypot(self.x, self.y)

    def __bool__(self):
        return bool(abs(self))

    def __add__(self, other):
        x = self.x + other.x
        y = self.y + other.y
        return Vector(x, y)

    def __mul__(self, scalar):
        return Vector(self.x * scalar, self.y * scalar)



> 有一个叫 ABC 语言, Python 从中继承对序列的统一处理方式. 都可以进行 迭代, 切片, 排序, 拼接

> Python 标准库用 C语言实现了丰富的序列类型

> 容器序列: list, tuple, collections.deque

> 扁平序列: str, bytes, array.array

> 可变序列: list, bytearrya, array.array

> 不可变序列: tuple, str, bytes

In [None]:
from collections import abc
issubclass(tuple, abc.Sequence)

True

In [None]:
issubclass(list, abc.MutableSequence)

True

In [None]:
symbols = '$¢£¥€¤a'
codes = []
for symbol in symbols:
    codes.append(ord(symbol))
codes

[36, 162, 163, 165, 8364, 164, 97]

> 列表推导式

In [None]:
codes = [ord(symbol) for symbol in symbols]
codes

[36, 162, 163, 165, 8364, 164, 97]

In [None]:
x = 'ABC'
codes = [ord(x) for x in x]
x

'ABC'

In [None]:
codes

[65, 66, 67]

In [None]:
codes = [last := ord(c) for c in x]
last

67

> 列表推导式和 map/filter 组合构建

In [None]:
symbols = '$¢£¥€¤'
beyond_ascii = [ord(s) for s in symbols if ord(s) > 127]
beyond_ascii

[162, 163, 165, 8364, 164]

In [None]:
beyond_ascii = list(filter(lambda c: c > 127, map(ord, symbols)))
beyond_ascii

[162, 163, 165, 8364, 164]

> 笛卡儿积

In [None]:
colors = ['black', 'white']
sizes = ['S', 'M', 'L']
tshirts = [(color, size) for color in colors for size in sizes]
tshirts

[('black', 'S'),
 ('black', 'M'),
 ('black', 'L'),
 ('white', 'S'),
 ('white', 'M'),
 ('white', 'L')]

> 使用生成器表达式

In [None]:
colors = ['black', 'white']
sizes = ['S', 'M', 'L']
for tshirt in ('%s %s' % (c, s) for c in colors for s in sizes):
    print(tshirt)

black S
black M
black L
white S
white M
white L


> 元组的两个作用, 一个是不可变列表, 另一个是用作记录

In [None]:
a = (10, 'alpha', [1, 2])
b = (10, 'alpha', [1, 2])
a == b

True

In [None]:
b[-1].append(99)
a == b

False

> 序列和可迭代对象拆包

In [None]:
divmod(20, 8)

(2, 4)

In [None]:
t = (20, 8)
divmod(*t)

(2, 4)

In [None]:
quotient, remainder = divmod(*t)
quotient, remainder

(2, 4)

In [None]:
# * 获取余下的项
a, b, *rest = range(5)
a, b, rest

(0, 1, [2, 3, 4])

In [None]:
a, b, *rest = range(2)
a, b, rest

(0, 1, [])

In [None]:
*range(4), 4

(0, 1, 2, 3, 4)

In [None]:
[*range(4), 4]

[0, 1, 2, 3, 4]

In [None]:
{*range(4), 4, *(5, 6, 7)}

{0, 1, 2, 3, 4, 5, 6, 7}

In [None]:
metro_areas = [
    ('Tokyo', 'JP', 36.933, (35.689722, 139.691667)),
    ('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889)),
    ('Mexico City', 'MX', 20.142, (19.433333, -99.133333)),
    ('New York-Newark', 'US', 20.104, (40.808611, -74.020386)),
    ('Sao Paulo', 'BR', 19.649, (-23.547778, -46.635833)),
]

def main():
    print(f'{"":15} | {"latitude":>9} | {"longitude":>9}')
    for name, _, _, (lat, lon) in metro_areas:
        if lon <= 0:
            print(f'{name:15} | {lat:9.4f} | {lon:9.4f}')

main()

                |  latitude | longitude
Mexico City     |   19.4333 |  -99.1333
New York-Newark |   40.8086 |  -74.0204
Sao Paulo       |  -23.5478 |  -46.6358


In [None]:
# 切片 slice
l = list(range(10))
l, l[2:5], l[:5], l[5:]

([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [2, 3, 4], [0, 1, 2, 3, 4], [5, 6, 7, 8, 9])

In [None]:
l[2:5] = [20, 30]
l

[0, 1, 20, 30, 5, 6, 7, 8, 9]

In [None]:
del l[5:7]
l

[0, 1, 20, 30, 5, 8, 9]

In [None]:
l = [1, 2, 3]
l * 5

[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]

In [None]:
5 * 'abcd'

'abcdabcdabcdabcdabcd'

In [None]:
my_list = [[1, 2],[3, 4]] * 3
my_list

[[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]]

In [None]:
# 构建嵌套列表
board = [['_'] * 3 for i in range(3)]
board

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [None]:
board[1][2] = 'X'
board

[['_', '_', '_'], ['_', '_', 'X'], ['_', '_', '_']]

In [None]:
l = [1, 2, 3]
id(l)

134401409846976

> 下面的例子表明 Python 排序算法是稳定的

In [None]:
fruits = ['grape', 'raspberry', 'apple', 'banana']
sorted(fruits)

['apple', 'banana', 'grape', 'raspberry']

In [None]:
fruits

['grape', 'raspberry', 'apple', 'banana']

In [None]:
sorted(fruits, key=len, reverse=False)

['grape', 'apple', 'banana', 'raspberry']

In [None]:
from array import array
from random import random

floats = array('d', (random() for i in range(10**7)))
floats[-1]

0.8984762205630115

In [None]:
10**2, 10*2

(100, 20)

In [None]:
import numpy as np
a = np.arange(12)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [None]:
type(a)

numpy.ndarray

In [None]:
a.shape

(12,)

In [None]:
a.shape = 3, 4
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [None]:
a.transpose()

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

> deque, queue, multiprocessing, asyncio, heapq

> Python 序列通常按可变性分类, 也可以换个角度, 分成扁平和容器.

In [1]:
d1 = { 'a': 1, 'b': 3 }
d2 = { 'a': 2, 'b': 4, 'c': 6 }
d1 | d2

{'a': 2, 'b': 4, 'c': 6}

In [2]:
""" 构建一个索引映射，列出词出现的位置 """
import collections
import re
import sys

WORD_RE = re.compile(r'\w+')

index = collections.defaultdict(list)
with open(sys.argv[1], encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            index[word].append(location)

for word in sorted(index, key=str.upper):
    print(word, index[word])

FileNotFoundError: [Errno 2] No such file or directory: '-f'