## 2.5 哈希表

负载因子：$load factor(\alpha) = \frac{n}{m}$，其中 $n$ 是元素个数，$m$ 是数组长度。

对于 Java 源码，当负载因子为 0.75 时，哈希表会进行扩容

### 2.5.1 拉链法


In [None]:
from operator import itruediv
from typing import Any


# 拉链法
class HashTable:
    class Entry:
        hash_code: int
        key: Any
        value: Any
        next_entry: "Entry"

        def __init__(self, hash_code: int, key: Any, value: Any, next_entry: "Entry" = None):
            self.hash_code = hash_code
            self.key = key
            self.value = value
            self.next_entry = next_entry

    table: list[Entry | None]
    size: int
    max_load: float

    def __init__(self, table_size: int = 16, max_load: float = 0.75):
        self.table = [None for _ in range(table_size)]
        self.max_load = max_load

    @property
    def load_factor(self):
        return self.size / len(self.table)

    @property
    def threshold(self):
        return self.max_load * len(self.table)

    # 求模运算替换为按位与运算，必须要求数组长度是2的n次方
    def get(self, hash_code: int, key: Any):
        # 1 获取表格索引
        table_index = hash_code & (self.size - 1)
        # 2 如果对应索引处没有链表，返回空
        if not self.table[table_index]:
            return None
        # 3 否则遍历链表直至找到目标值
        traval: "HashTable.Entry" = table_index[table_index]
        while traval:  # type:HashTable.Entry
            if traval.key == key:
                return traval.value
            traval = traval.next_entry
        return None

    def put(self, hash_code: int, key: Any, value: Any):
        # 1 获取表格索引
        table_index = hash_code & (self.size - 1)
        # 2 如果对应索引处没有链表，建立链表头
        if not self.table[table_index]:
            self.table[table_index] = HashTable.Entry(hash_code, key, value)
            return
        # 3 如果对应索引处有链表，插入到末尾
        traval: "HashTable.Entry" = table_index[table_index]
        while traval.next_entry:  # type:HashTable.Entry
            # 3.1 若是有相同 key，做值的替换，退出循环返回结果
            if traval.key == key:
                traval.value = value
                return
            # 3.2 不断遍历到最后一个空位
            traval = traval.next_entry
        traval.next_entry = HashTable.Entry(hash_code, key, value)
        self.size += 1
        if self.size > self.threshold:
            self.resize()

    def remove(self, hash_code: int, key: Any):
        hash(hash_code)
        # 1 获取表格索引
        table_index = hash_code & (self.size - 1)
        # 2 如果对应索引处没有链表，返回空
        if not self.table[table_index]:
            return None
        # 3 找指定的 key
        prev = None
        cur = self.table[table_index]
        while cur:
            if cur.key == key:
                if not prev:
                    self.table[table_index] = cur.next_entry
                else:
                    prev.next_entry = cur.next_entry
                self.size -= 1
                return cur.value
            prev = cur
            cur = cur.next_entry
        return None

    def resize(self):
        new_table = [None for _ in range(len(self.table) << 2)]
        for index, e in enumerate(self.table):
            if e:
                left, right = None, None
                left_head, right_head = None, None  # 左：新数组index位置；右：新数组index+len(旧数组)位置
                p = e
                while p:
                    if p.key & len(self.table) == 0:
                        if not left_head:
                            left_head = p
                        else:
                            left.next_entry = p
                        left = p  # 移动到下一个元素
                    else:
                        if not right_head:
                            right_head = p
                        else:
                            right.next_entry = p
                        right = p  # 移动到下一个元素
                if left:
                    left.next_entry = None
                    new_table[index] = left_head
                if right:
                    right.next_entry = None
                    new_table[index + len(self.table)] = right_head
        self.table = new_table

In [5]:
hash("abc")

-1615247710174026067

In [7]:
from typing import List


# LeetCode 01
def twoSum(self, nums: List[int], target: int) -> List[int]:
    hash_map = {}
    for index, num in enumerate(nums):
        x = num
        y = target - x
        if y in hash_map:
            return [hash_map[y], index]
        else:
            hash_map[num] = index

twoSum(None, [3,3], 6)

[0, 1]

In [8]:
from typing import List
# LeetCode 3
def lengthOfLongestSubstring(self, s: str) -> int:
    hash_map = {}
    begin = 0
    max_len = float('-inf')
    for index, ch in enumerate(s):
        if ch in hash_map:
            begin = max(begin, hash_map[ch] + 1)    # 从重复处开始取新值

            hash_map[ch] = index
        else:
            hash_map[ch] = index
        max_len = max(index - begin + 1, max_len)
    return max_len

lengthOfLongestSubstring(None, "abba")

2

In [8]:
from typing import List

# LeetCode 49
def groupAnagrams(self, strs: List[str]) -> List[List[str]]:
    hash_map = {}
    for item in strs:
        sorted_str = "".join(sorted(item))
        if sorted_str not in hash_map:
            hash_map[sorted_str] = [item]
        else:
            hash_map[sorted_str].append(item)
    return list(hash_map.values()) if hash_map else []

groupAnagrams(None, ["eat","tea","tan","ate","nat","bat"])

[['eat', 'tea', 'ate'], ['tan', 'nat'], ['bat']]

In [3]:
# LeetCode 217
def containsDuplicate(self, nums: List[int]) -> bool:
    hash_set = set()
    for num in nums:
        if num in hash_set:
            return True
        else:
            hash_set.add(num)
    return False

'aabccc'

In [9]:
# LeetCode 136
def singleNumber(self, nums: List[int]) -> int:
    """异或算法"""
    ret = nums[0]
    for i in range(1, len(nums)):
        ret ^= nums[i]
    return ret

singleNumber(None, [1,2,3,4,5,6,1,2,3,4,5])

6

In [None]:
from collections import Counter
# LeetCode 242
def isAnagram(self, s: str, t: str) -> bool:
    return Counter(s) == Counter(t)

In [None]:
from collections import Counter
# LeetCode 387
def firstUniqChar(self, s: str) -> int:
    hash_map = Counter(s)
    for index, ch in enumerate(s):
        if hash_map[ch] == 1:
            return index

    return -1

In [19]:
# LeetCode 819
import re
from collections import Counter
def mostCommonWord(self, paragraph: str, banned: List[str]) -> str:
    splits = re.findall("[A-Za-z]+",paragraph.lower())
    cnt_map = Counter(splits)
    for ban in banned:
        cnt_map.pop(ban)
    print(cnt_map.most_common(1)[0][0])

mostCommonWord(None, "Bob hit a ball, the hit BALL flew far after it was hit.", ["hit"])

ball
