# Hash Tables

Lookup, insertion, deletion in O(1) average. This is implemented as a fixed-size array and separate chaining with linked lists to deal to collisions.

In [1]:
import mmh3 # https://pypi.org/project/mmh3

In [2]:
class Node:
    """Node of a singly-linked list."""
    
    def __init__(self, key, value):
        self.key = key
        self.value = value
        self.next = None

        
class Bucket:
    """Bucket implemented as a singly-linked list."""
    
    def __init__(self):
        self.next = None
        
    def insert(self, key, value):
        node = self
        while node.next:
            node = node.next
            if node.key == key:
                node.value = value
                return
        node.next = Node(key, value)

    def lookup(self, key):
        node = self
        while node.next:
            node = node.next
            if node.key == key:
                return node.value
        raise KeyError(key)

    def delete(self, key):
        node = self
        while node.next:
            if node.next.key == key:
                node.next = node.next.next
                return
        raise KeyError(key)
    
    def __str__(self):
        s = ''
        node = self
        while node.next:
            node = node.next
            s += '[%r = %r] → ' % (node.key, node.value)
        s += '◇'
        return s
    
    __repr__ = __str__
        

def _hash(x, size):
    """Hash basic types."""
    if isinstance(x, str):
        return mmh3.hash(x) % size
    if isinstance(x, int):
        return mmh3.hash(hex(x)) % size
    if isinstance(x, float):
        return mmh3.hash(x.hex()) % size
    if isinstance(x, (list, tuple, range)):
        return mmh3.hash(''.join(str(_hash(k, size)) for k in x)) % size


class HashTable:
    """Hash table."""
    
    def __init__(self, size=256):
        self._array = [Bucket() for _ in range(size)]
        self._size = size
    
    def __getitem__(self, key):
        return self._array[_hash(key, self._size)].lookup(key)

    def __setitem__(self, key, value):
        self._array[_hash(key, self._size)].insert(key, value)
        
    def __delitem__(self, key):
        return self._array[_hash(key, self._size)].delete(key)
    
    def __str__(self):
        return '\n'.join('{:>3d} | {}'.format(i, self._array[i]) for i in range(self._size))

    __repr__ = __str__

In [3]:
h = HashTable(16)

In [4]:
try:
    h[3]
except KeyError as e:
    print(e)

3


In [5]:
try:
    del h[4]
except KeyError as e:
    print(e)

4


In [6]:
h[1] = 11
h[2] = 22
h[3] = 33
h[4] = 44
h[5] = 55
h[6] = 66

In [7]:
h[1], h[2], h[3], h[4], h[5], h[6]

(11, 22, 33, 44, 55, 66)

In [8]:
del h[1]

In [9]:
try:
    h[1]
except KeyError as e:
    print(e)

1


In [10]:
print(h)

  0 | ◇
  1 | [2 = 22] → ◇
  2 | [3 = 33] → ◇
  3 | [5 = 55] → ◇
  4 | ◇
  5 | ◇
  6 | ◇
  7 | ◇
  8 | [6 = 66] → ◇
  9 | ◇
 10 | ◇
 11 | ◇
 12 | [4 = 44] → ◇
 13 | ◇
 14 | ◇
 15 | ◇


In [11]:
h[['a', 1, True]] = ('b', 2, False)

In [12]:
h[['a', 1, 1]]

('b', 2, False)

In [13]:
h[3.14] = 'pi'
h[2.718] = 'e'

In [14]:
print(h)

  0 | ◇
  1 | [2 = 22] → ◇
  2 | [3 = 33] → ◇
  3 | [5 = 55] → [2.718 = 'e'] → ◇
  4 | ◇
  5 | ◇
  6 | ◇
  7 | ◇
  8 | [6 = 66] → ◇
  9 | ◇
 10 | ◇
 11 | ◇
 12 | [4 = 44] → ◇
 13 | ◇
 14 | [['a', 1, True] = ('b', 2, False)] → [3.14 = 'pi'] → ◇
 15 | ◇
