In [52]:
class Array(object):
    def __init__(self,size=32,init=None):
        self._size=size
        self._items=[init]*size
        
    def __getitem__(self,index):
        return self._items[index]
    
    def __setitem__(self,index,value):
        self._items[index]=value
        
    def __len__(self):
        return self._size
    
    def clear(self,value=None):
        for i in range(len(self._items)):
            self._items[i]=value
            
    def __iter__(self):
        for item in self._items:
            yield item
            
class Slot(object):
    def __init__(self,key,value):
        self.key,self.value=key,value
        
class HashTable(object):
    UNUSED=None
    EMPTY=Slot(None,None)
    
    def __init__(self):
        self._table=Array(8,init=HashTable.UNUSED)  # 定长数组
        self.length=0 # 实际占用的槽数
        
    @property
    def _load_factor(self):
        return self.length / float(len(self._table))
    
    def __len__(self):
        return self.length
    
    def _hash(self,key):
        return abs(hash(key)) % len(self._table)   # 返回键-值所在slot的index
    
    def _find_key(self,key):
        index=self._hash(key)
        _len=len(self._table)
        while self._table[index] is not HashTable.UNUSED:
            if self._table[index] is HashTable.EMPTY:
                index=(index*5+1) % _len
                continue
            elif self._table[index].key==key:
                return index
            else:
                index=(index*5+1) % _len
        return None
    
    def _slot_can_insert(self,index):
        return (self._table[index] is HashTable.EMPTY or self._table[index] is HashTable.UNUSED)
    
    def _find_slot_for_insert(self,key):
        index=self._hash(key)
        _len=len(self._table)
        while not self._slot_can_insert(index):
            index=(index*5+1) % _len
        return index
    
    def __contains__(self,key):
        index=self._find_key(key)
        return index is not None
    
    def add(self,key,value):
        if key in self:
            index=self._find_key(key)
            self._table[index].value=value
            return False
        else:
            index=self._find_slot_for_insert(key)
            self._table[index]=Slot(key,value)
            self.length+=1
            if self._load_factor>=0.8:
                self._rehash()
            return True
        
    def _rehash(self):
        old_table=self._table
        newsize=len(self._table)*2
        self._table=Array(newsize,HashTable.UNUSED)
        self.length=0
        for slot in old_table:
            if slot is not HashTable.UNUSED and slot is not HashTable.EMPTY:
                index=self._find_slot_for_insert(slot.key)
                self._table[index]=slot
                self.length+=1
                
    def get(self,key,default=None):
        index=self._find_key(key)
        if index is None:
            return default
        else:
            return self._table[index].value
        
    def remove(self,key):
        index=self._find_key(key)
        if index is None:
            raise KeyError()
        value=self._table[index].value
        self.length-=1
        self._table[index]=HashTable.EMPTY
        return value
    
    def __iter__(self):
        for slot in self._table:
            if slot not in (HashTable.EMPTY,HashTable.UNUSED):
                yield slot.key
                
def test_hash_table():
    h = HashTable()
    h.add('a', 0)
    h.add('b', 1)
    h.add('c', 2)
    assert len(h) == 3
    assert h.get('a') == 0
    assert h.get('b') == 1
    assert h.get('hehe') is None

    h.remove('a')
    assert h.get('a') is None
    assert sorted(list(h)) == ['b', 'c']

    n = 50
    for i in range(n):
        h.add(i, i)

    for i in range(n):
        assert h.get(i) == i
        

In [53]:
abs(hash(3)) % 8

3

In [54]:
test_hash_table()

In [57]:
class DictADT(HashTable):
    def _iter_slot(self):
        for slot in self._table:
            if slot not in (HashTable.EMPTY, HashTable.UNUSED):
                yield slot
    
    def __setitem__(self,key,value):
        self.add(key,value)
        
    def __getitem__(self,key):
        if key not in self:
            raise KeyError()
        else:
            return self.get(key)
        
    def items(self):
        for slot in self._iter_slot():
            yield (slot.key,slot.value)

            
    def keys(self):
        for slot in self._iter_slot():
            yield slot.key
            
    def values(self):
        for slot in self._iter_slot():
            yield slot.value
            
            
def test_dict_adt():
    import random
    d = DictADT()
    '''
    作为dict的key必须是可哈希的，也就是说不能是可变对象
    An object is hashable if it has a hash value which never changes during its lifetime (it needs a __hash__() method), 
    and can be compared to other objects (it needs an __ eq__() or __cmp__(). 
    Hashable objects which compare equal must have the same hash value.
    
    一个对象能被称为 hashable ，
    它必须有个 hash 值，这个值在整个生命周期都不会变化，
    而且必须可以进行相等比较，所以一个对象可哈希，它必须实现__hash__() 与 __eq__() 方法。
    python中所有不可改变的的对象（imutable objects）都是可哈希的，比如字符串，元组，也就是说可改变的容器如字典，列表，集合不可哈希（unhashable）

    '''

    d['a'] = 1
    assert d['a'] == 1
    d.remove('a')

    l = list(range(30))
    random.shuffle(l)
    for i in l:
        d.add(i, i)
    
    for i in range(30):
        assert d.get(i) == i

    assert sorted(list(d.keys())) == sorted(l)

In [58]:
test_dict_adt()

In [115]:
class SetADT(HashTable):
    def add(self,key):
        return super(SetADT,self).add(key,True)
    
    def __and__(self,other_set):
        new_set=SetADT()
        for element_a in self:
            if element_a in other_set:
                new_set.add(element_a)
        return new_set
    
    def __sub__(self,other_set):
        new_set=SetADT()
        for element_a in self:
            if element_a not in other_set:
                new_set.add(element_a)
        return new_set
    
    def __or__(self,other_set):
        new_set=SetADT()
        for element_a in self:
            new_set.add(element_a)
        for element_b in other_set:
            new_set.add(element_b)
        return new_set
    
    def __xor__(self,other_set):
        new_set=SetADT()
        for element_a in self:
            if element_a not in other_set:
                new_set.add(element_a)
        for element_b in other_set:
            if element_b not in self:
                new_set.add(element_b)
        return new_set
    
    def remove(self,key):
        return super(SetADT,self).remove(key)
    
    
def test_set_adt():
    sa = SetADT()
    sa.add(1)
    sa.add(2)
    sa.add(3)
    assert 1 in sa    # 测试  __contains__ 方法，实现了 add 和 __contains__，集合最基本的功能就实现啦

    sb = SetADT()
    sb.add(3)
    sb.add(4)
    sb.add(5)
    print(list(sa))
    print(list(sb))
    assert sorted(list(sa & sb)) == [3]
    assert sorted(list(sa - sb)) == [1, 2]
    assert sorted(list(sa | sb)) == [1, 2, 3, 4, 5]
    assert sorted(list(sa ^ sb)) == [1,2,4,5]
    sa.remove(1)
    assert sorted(list(sa))==[2,3]
    sa.pop()
    print(list(sa))
        

In [116]:
test_set_adt()

[1, 2, 3]
[3, 4, 5]
[2, 3]


In [105]:
hash(1)

1