# Sequence Hacking, Hashing, and Slicing

## Vector： A User-Defined Sequence Type

## Vector Take #1

In [2]:
from array import array
import reprlib
import math

In [3]:
class Vector:
    typecode = 'd'
    
    def __init__(self, components):
        self._components = array(self.typecode, components)
        
    def __iter__(self):
        return iter(self._components)
    
    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)
    
    def __bytes__(self):
        return (bytes([ord(sefl.typecode)]) + bytes(self.components))
    
    def __eq__(self, other):
        return tuple(self) == tuple(other)
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)

## Protocol and Duck Typing

在面向对象编程中，协议是非正式的接口，只在文档中定义，在代码中不定义。例如，Python的序列协议主要`__len__`和`__getitem__`两个方法。任何类，只要使用标准的签名和语义实现了这两个方法，就能用在任何期待序列的地方。

In [5]:
import collections


Card = collections.namedtuple('Card', ['rank', 'suit'])

class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diiamonds clubs hearts'.split()
    
    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits for rank in self.ranks]
        
    def __len__(self):
        return len(self._cards)
    
    def __getitem__(self, position):
        return self._cards[position]

## Vector Take #2: A Sliceable Sequence

In [6]:
class Vector:
    typecode = 'd'
    
    def __init__(self, components):
        self._components = array(self.typecode, components)
        
    def __iter__(self):
        return iter(self._components)
    
    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)
    
    def __bytes__(self):
        return (bytes([ord(sefl.typecode)]) + bytes(self.components))
    
    def __eq__(self, other):
        return tuple(self) == tuple(other)
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)
    
    def __len__(self):
        return len(self._components)
    
    def __getitem__(self, index):
        return self._components[index]

In [7]:
v1 = Vector([3, 4, 5])

In [8]:
len(v1)

3

In [9]:
v1[0], v1[-1]

(3.0, 5.0)

### How Slicing Works

In [10]:
class MySeq:

    def __getitem__(self, index):
        return index

In [11]:
s = MySeq()
s[1]

1

In [14]:
s[1:4]

slice(1, 4, None)

In [17]:
s[1:4:2]

slice(1, 4, 2)

In [18]:
s[1:4:2, 7:9]

(slice(1, 4, 2), slice(7, 9, None))

In [22]:
#help(slice)

Help on class slice in module builtins:

class slice(object)
 |  slice(stop)
 |  slice(start, stop[, step])
 |  
 |  Create a slice object.  This is used for extended slicing (e.g. a[0:10:2]).
 |  
 |  Methods defined here:
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __gt__(self, value, /)
 |      Return self>value.
 |  
 |  __le__(self, value, /)
 |      Return self<=value.
 |  
 |  __lt__(self, value, /)
 |      Return self<value.
 |  
 |  __ne__(self, value, /)
 |      Return self!=value.
 |  
 |  __reduce__(...)
 |      Return state information for pickling.
 |  
 |  __repr__(self, /)
 |      Return repr(self).
 |  
 |  indices(...)
 |      S.indices(len) -> (start, stop, stride)
 |      
 |      Assuming a sequence of length len, calculate the start and stop
 |      indices, and the stride length of the extended slice des

1. slice是内置类型
2. slice有start, step, stop属性，以及indices方法

In [24]:
help(slice.indices)

Help on method_descriptor:

indices(...)
    S.indices(len) -> (start, stop, stride)
    
    Assuming a sequence of length len, calculate the start and stop
    indices, and the stride length of the extended slice described by
    S. Out of bounds indices are clipped in a manner consistent with the
    handling of normal slices.



In [26]:
slice(None, 10, 2).indices(5)

(0, 5, 2)

In [27]:
slice(-3, None, None).indices(5)

(2, 5, 1)

### A Slice-Aware `__getitem__`

让Vector表现为序列所需要点两个方法： `__len__`和`__getitem__`。

In [29]:
import numbers

```python

class Vecotr:

    def __len__(self):
        return len(self._components)
    
    def __getitem__(self, index):
        cls = type(self)
        if isinstance(index, slice):
            return cls(self._components[index])
        elif isinstance(index, numbers.Integral):
            return self._components[index]
        else:
            msg = '{cls.__name__} indices must be integers'
            raise TypeError(msg.format(cls=cls))
            
```

## Vector Take #3: Dynamic Attribute Access

## Vector Take #4: Hashing and a faster ==

In [36]:
import functools
import operator

In [38]:
class Vector:
    typecode = 'd'
    
    def __init__(self, components):
        self._components = array(self.typecode, components)
        
    def __iter__(self):
        return iter(self._components)
    
    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)
    
    def __bytes__(self):
        return (bytes([ord(sefl.typecode)]) + bytes(self.components))
        
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)
    
    def __len__(self):
        return len(self._components)
    
    def __getitem__(self, index):
        return self._components[index]
    
    def __eq__(self, other):
        return tuple(self) == tuple(other)

    def __hash__(self):
        hashes = (hash(x) for x in self._components)  # 生成器表达式，惰性求值
        return functools.reduce(operator.xor, hashes, 0)

> 使用reduce函数时最好提供第三个参数，reduce(function, iterable, initializer)，这样就能避免这个异常：TypeError: reduce() of empty sequence with no initial value。 如果序列为空，initializer是返回的结果；否则，在归约中使用它作为第一个参数，因此应该使用恒等值。比如，对于+、|和^来说，initializer应该是0，对于*和&来说，应该是1.

```python
def __hash__(self):
    hashes = map(hash, self._components)
    return functools.reduce(operator.xor, hashes)
```

> Python 2中使用map函数效率低些，应为map函数要是用结果构建一个列表。但是在Python3中，map函数是惰性的，它会创建一个生成器，按需产出结果，因此能节省内存。

In [39]:
class Foo:
    
    def __eq__(self, other):
        if len(self) != len(other):
            return False
        
        for a, b in zip(self, other):
            if a != b:
                return False
        return True


zip函数的名字来自拉链系结物（zipper fastener），因为这个物品用于把两个拉链的链牙咬合在一起。

In [40]:
zip(range(3), 'ABC')

<zip at 0x1145a9910>

> zip函数按需返回一个生成器，按需生成元组

In [41]:
list(zip(range(3), 'ABC'))

[(0, 'A'), (1, 'B'), (2, 'C')]

> 为了输出，构建一个列表；通常，我们会迭代生成器

In [42]:
list(zip(range(3), 'ABC', [0.0, 1.1, 2.2, 3.3]))

[(0, 'A', 0.0), (1, 'B', 1.1), (2, 'C', 2.2)]

> 当有一个可迭代对象耗尽后，它不会发出警告就停止。

In [43]:
import itertools

In [45]:
list(itertools.zip_longest(range(3), 'ABC', [0.0, 1.1, 2.2, 3.3], fillvalue=-1))

[(0, 'A', 0.0), (1, 'B', 1.1), (2, 'C', 2.2), (-1, -1, 3.3)]

> itertools.zip_longest函数行为有所不同：使用可选的 fillvalue(默认填充为None)填充缺失值，因此可以继续产出，直到最长的可迭代对象耗尽。

## 本章小结

In [49]:
from array import array
import reprlib
import math
import numbers
import functools
import operator
import itertools

In [53]:
class Vecotr:
    typecode = 'd'
    
    def __init__(self, components):
        self._components = array(self.typecode, components)
        
    def __iter__(self):
        return iter(self._components)
    
    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes([ord(self.typecode)]) + bytes(self._components))
    
    def __eq__(self):
        return (len(self) == len(other) and all(a==b for a, b in zip(self, other)))
    
    def __hash__(self):
        hashes = (hash(x) for x in self)
        return functools.reduce(operator.xor, hashes, 0)
    
    def __abs__(self):
        return bool(abs(self))
    
    def __len__(self):
        return len(self._components)
    
    def __getitem__(self, index):
        cls = type(self)
        if isinstance(index, slice):
            return cls(self._components[index])
        elif isinstance(index, numbers.Integral):
            return self._components[index]
        else:
            msg = '{.__name__} indices must be integers'
            raise TypeError(msg.format(cls))
            
    shortcut_names = 'xyzt'
    
    def __getattr__(self, name):
        cls = type(self)
        if len(name) == 1:
            pos = cls.shortcut_names.find(name)
            if 0 <= pos < len(self._components):
                return self._components[pos]
            
        msg = '{.__name__!r} object has not attribute {!r}'
        raise AttributeError(msg.format(cls, name))

    def angle(self, n):
        r = math.sqrt(sum(x*x for x in self[n:]))
        a = math.atan2(r, self[n-1])
        if (n == len(self) - 1) and (self[-1] < 0):
            return math.pi * 2 - a
        else:
            return a
        
    def angles(self):
        return (self.angle(n) for n in range(1, len(self)))
    
    def __format__(self, fmt_spec=''):
        if fmt_spec.endswith('h'):
            fmt_spec = fmt_spec[:-1]
            coords = itertools.chain([abs(self)], self.angles())
            outer_fmt = '<{}>'
        else:
            coods = self
            outer_fmt = '({})'
            
        components = (format(c, fmt_spec) for c in coords)
        return outer_fmt.format(', '.join(components))
    
    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)

```
Vector类的构造方法接受一个可迭代的对象，这与内置的序列类型一样。Vector的行为之所以像序列，是因为实现了__getitem__和__len__方法，这是鸭子类型语言使用的非正式接口。

my_seq[a:b:]句法的工作原理：创建slice(a, b, c)对象，交给__getitem__方法处理。了解这一点之后，我们让Vector正确处理切片，像符合Python风格的序列那样返回新的Vector实例。

实现__hash__方法特别适用用functools.reduce函数，因为我们要把异或运算符^依次应用到各个分量的散列值商，生成整个向量的聚合散列值。在__hash__方法中使用reduce函数之后，我们又使用内置的归约函数all实现了效率更好的__eq__方法。
```

## 延伸阅读

**把协议当作非正式的接口**

协议不是Python发明的。Smalltalk团队，也就是“面向对象”的发明者，使用“协议”这个词表示我们现在称之为接口的特性。

动态类型语言中的既定协议会自然进化。所谓动态类型是指在运行时检查类型，因为方法签名和变量没有静态类型信息。Ruby是一门重要的面向对象动态类型语言，它也使用协议。

Python文档中，如果看到“文件类型对象”这样的表述，通常说的是协议。这是一种简短的说法，意思是：“行为基本和文件一致，实现了部分文件的接口，满足上下文相关需求的东西”