## 6.2 哈希冲突

## 6.2.1 链式地址解决哈希冲突

In [71]:
class Pair:
    def __init__(self, key, val):
        self.key = key
        self.val = val

    def __str__(self):
        return "{}: {}".format(self.key, self.val)

    def __repr__(self):
        return self.__str__()

    def __hash__(self):
        return hash(self.key)


class SeperatedChainingHashTable(object):
    def __init__(self, capacity: int):
        # self._bucket:list[list[Pair]] = [[]] * capacity  //错误写法，会导致数组内每个数组实际上是同一个数组
        self._bucket: list[list[Pair]] = [[] for _ in range(capacity)]
        self._capacity: int = capacity
        self._len: int = 0

    def __len__(self):
        return self._len

    def __str__(self):
        # return [self._bucket[i].__str__() for i in range(len(self._bucket))].__str__()
        return self._bucket.__str__()

    def __repr__(self):
        return self._bucket.__str__()

    def hash_func(self, key) -> int:
        return hash(key) % self._capacity

    def put(self, key, val):
        pair = Pair(key, val)
        index = self.hash_func(key)
        self._len += 1
        self._bucket[index].append(pair)
        # 如果负载因子大于2/3，则进行扩容操作
        if float(self._len) / self._capacity > 2. / 3.:
            self.extend()

    def get(self, key):
        index = self.hash_func(key)
        if len(self._bucket[index]) == 0:
            return None
        else:
            for p in self._bucket[index]:
                if p.key == key:
                    return p.val
            return None

    def remove(self, key):
        index = self.hash_func(key)
        if len(self._bucket[index]) == 0:
            return None
        else:
            for i in range(len(self._bucket[index])):
                if self._bucket[index][i].key == key:
                    p = self._bucket[index].pop(i)
                    self._len -= 1
                    return p.val
            return None

    def extend(self):
        """
        对哈希表进行扩容，扩容为原来的两倍
        """
        self._capacity = self._capacity * 2
        _bucket = self._bucket
        self._bucket = [[] for _ in range(self._capacity)]
        self._len = 0
        for key, val in [(p.key, p.val) for i in range(len(_bucket)) for p in _bucket[i]]:
            self.put(key, val)

    def keys(self):
        res = []
        for i in range(len(self._bucket)):
            for p in self._bucket[i]:
                res.append(p.key)
        return res
        # return [p.key for i in range(len(self._bucket)) for p in self._bucket[i]]

    def values(self):
        return [p.val for i in range(len(self._bucket)) for p in self._bucket[i]]

    def items(self):
        return [(p.key, p.val) for i in range(len(self._bucket)) for p in self._bucket[i]]

In [72]:
seperatedChainingHashTable = SeperatedChainingHashTable(capacity=10)
seperatedChainingHashTable.put(1, "张三")
seperatedChainingHashTable.put(2, "李四")
seperatedChainingHashTable.put(3, "王五")
seperatedChainingHashTable.put(4, "赵六")
seperatedChainingHashTable.put(11, '离心')
seperatedChainingHashTable.put(21, '李白')
seperatedChainingHashTable.remove(21)
# seperatedChainingHashTable.get(11)
# len(seperatedChainingHashTable)
# 扩容extend
seperatedChainingHashTable.extend()
seperatedChainingHashTable

[[], [1: 张三], [2: 李四], [3: 王五], [4: 赵六], [], [], [], [], [], [], [11: 离心], [], [], [], [], [], [], [], []]

In [73]:
_bucket = [[]] * 10
_bucket[1].append(1)
_bucket

[[1], [1], [1], [1], [1], [1], [1], [1], [1], [1]]

In [74]:
_bucket = [[] for _ in range(10)]
_bucket[1].append(1)
_bucket

[[], [1], [], [], [], [], [], [], [], []]

## 6.2.2 开发寻址

### 1.线性探测

In [105]:
# 使用这个常量标记懒删除的桶，None和TOMBSTONE都代表空桶
TOMBSTONE: str = 'TOMBSTONE'


class OpenAddressingHashTable(object):
    def __init__(self, capacity: int):
        self._bucket: list[Pair | None | TOMBSTONE] = [None] * capacity
        self._capacity: int = capacity
        self._len: int = 0

    def __len__(self):
        return self._len

    def __str__(self):
        return self._bucket.__str__()

    def __repr__(self):
        return self._bucket.__str__()

    def hash_func(self, key) -> int:
        return hash(key) % self._capacity

    # 线性探测函数 f(i) = i + 2
    def linear_detect_func(self, index: int) -> int:
        return (index + 2) % self._capacity

    # 探测函数
    def detect_func(self, index: int) -> int:
        return self.linear_detect_func(index)

    def put(self, key, val):
        # 如果负载因子超过0.5，则进行扩容
        if float(self._len / self._capacity) > 1. / 2:
            self.extend()
        # 搜索key对应索引
        pair = Pair(key, val)
        index = self.hash_func(key)
        # 找到一个空桶进行存放
        while (self._bucket[index] is not None) and (self._bucket[index] != TOMBSTONE):
            index = self.detect_func(index)
        self._bucket[index] = pair
        self._len += 1

    # 找到存放对应key的桶，如果没找到，则返回None
    def find_bucket(self, key) -> int | None:
        index = self.hash_func(key)
        while self._bucket[index] is not None:
            if (self._bucket[index] != TOMBSTONE) and (self._bucket[index].key == key):  # 找到了存放对应key的桶
                return index
            index = self.detect_func(index)
        return None

    def get(self, key):
        index = self.find_bucket(key)
        if index is not None:
            return self._bucket[index].val
        else:
            return None

    # 删除必须采用懒删除
    def remove(self, key):
        index = self.find_bucket(key)
        if index is not None:
            self._bucket[index] = TOMBSTONE  # 采用懒删除
            self._len -= 1
        else:
            return None

    def extend(self):
        """
        对哈希表进行扩容，扩容为原来的两倍
        """
        self._capacity = self._capacity * 2
        _bucket = self._bucket
        items = [(p.key, p.val) for i in range(len(_bucket)) for p in _bucket[i]]
        self._bucket = [[] for _ in range(self._capacity)]
        self._len = 0
        for key, val in items:
            self.put(key, val)

    def keys(self):
        # res = []
        # for p in self._bucket:
        #     if p is not None:
        #         res.append(p.key)
        # return res
        return [p.key if (p is not None) and (p != TOMBSTONE) else None for p in self._bucket]

    def values(self):
        return [p.val if (p is not None) and (p != TOMBSTONE) else None for p in self._bucket]

    def items(self):
        return [(p.key, p.val) if (p is not None) and (p != TOMBSTONE) else None for p in self._bucket]

In [106]:
openAddressingHashTable = OpenAddressingHashTable(capacity=10)
openAddressingHashTable.put(1, "张三")
openAddressingHashTable.put(4, "李四")
openAddressingHashTable.put(5, "王五")
openAddressingHashTable.put(6, "赵六")
openAddressingHashTable.put(11, "张三丰")
openAddressingHashTable.put(21, "张三风")
openAddressingHashTable.remove(11)
# openAddressingHashTable.get(21)
# len(openAddressingHashTable)
openAddressingHashTable

[None, 1: 张三, None, 'TOMBSTONE', 4: 李四, 5: 王五, 6: 赵六, 21: 张三风, None, None]