In [1]:
%autosave 0
from IPython.core.display import HTML, display
display(HTML('<style>.container { width:100%; !important } </style>'))

Autosave disabled


# Hash Tables (Open Hashing)

Given a word $w$ and the size $n$ of the hash table, the function $\texttt{hash_code}(w, n)$ calculates the hash code of $w$.  For a word 
$w = c_0c_1\cdots c_{m-1}$ of length $m$, this function is defined as follows:
$$ \texttt{hash_code}(w, n) = \left(\sum\limits_{i=0}^{m-1} \texttt{ord}(c_i) \cdot 128^i\right) \;\texttt{%}\; n  $$
In order to prevent overflows when computing the numbers $128^i$ we can define the partial sum $s_k$ for
$k=0,1,\cdots,m-1$ by induction: 
  - $s_{0} = \texttt{ord}(c_{m-1}) \;\texttt{%}\; n$,
  - $s_{k+1} = \bigl(s_k \cdot 128 + \texttt{ord}(c_{k}) \bigr) \;\texttt{%}\; n$.

Then we have
$$ s_{m-1} = \left(\sum\limits_{i=0}^{m-1} \texttt{ord}(c_i) \cdot 128^i\right) \;\texttt{%}\; n. $$

In [2]:
def hash_code(w, n):
    m = len(w)
    s = 0
    for k in range(m-1, -1, -1):
        s = (s * 128 + ord(w[k])) % n
    return s

Let us test this function.

In [3]:
hash_code('George W. Bush', 6761)

4187

In [4]:
class HashTable:
    def __init__(self, n):
        self.mSize    = n
        self.mEntries = 0  # number of entries
        self.mArray   = [ [] for i in range(self.mSize) ]
        self.mAlpha   = 2  # load factor

Hash tables work best if their size is a prime number.  Therefore, the variable `Primes` stores a list of prime numbers.  
These numbers are organized so that `Primes[i+1]` is roughly twice as big as `Primes[i]`.

In [5]:
HashTable.Primes = [ 3, 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 
                     8191, 16381, 32749, 65521, 131071, 262139, 524287, 
                     1048573, 2097143, 4194301, 8388593, 16777213, 
                     33554393, 67108859, 134217689, 268435399, 
                     536870909, 1073741789, 2147483647 
                   ]

In [6]:
def find(self, key):
    index = hash_code(key, self.mSize)
    aList = self.mArray[index];
    for (k, v) in aList:
        if k == key:
            return v

HashTable.find = find

In [7]:
def insert(self, key, value):
    if self.mEntries >= self.mSize * self.mAlpha:
        print('mEntries =', self.mEntries, 'mSize =', self.mSize)
        self._rehash()
    index = hash_code(key, self.mSize)
    aList = self.mArray[index]
    for i, (k, v) in enumerate(aList):
        if k == key:
            aList[i] = (key, value) 
            return
    self.mEntries += 1
    aList.append((key, value))
    
HashTable.insert = insert

In [8]:
def _rehash(self):
    for p in HashTable.Primes:
        if p * self.mAlpha > self.mEntries:
            prime = p
            break
    biggerTable = HashTable(prime)
    for aList in self.mArray:
        for k, v in aList:
            biggerTable.insert(k, v)
    self.mSize  = prime
    self.mArray = biggerTable.mArray
    
HashTable._rehash = _rehash

In [9]:
def delete(self, key):
    index = hash_code(key, self.mSize)
    aList = self.mArray[index]
    for i, (k, v) in enumerate(aList):
        if k == key:
            aList.pop(i)
            self.mEntries -= 1
            return 

HashTable.delete = delete

In [10]:
def __repr__(self):
    result = ''
    for i, aList in enumerate(self.mArray):
        result += f'{i}: {aList}\n'
    return result

HashTable.__repr__ = __repr__

In [11]:
t = HashTable(3)
t.insert('Adrian', 8)
t

0: [('Adrian', 8)]
1: []
2: []

In [12]:
t.insert('Benjamin', 24)
t

0: [('Adrian', 8), ('Benjamin', 24)]
1: []
2: []

In [13]:
t.insert('Bereket', 1)
t

0: [('Adrian', 8), ('Benjamin', 24)]
1: [('Bereket', 1)]
2: []

In [14]:
t.insert('Christian', 13)
t

0: [('Adrian', 8), ('Benjamin', 24)]
1: [('Bereket', 1)]
2: [('Christian', 13)]

In [15]:
t.insert('Christian', 14)
t

0: [('Adrian', 8), ('Benjamin', 24)]
1: [('Bereket', 1)]
2: [('Christian', 14)]

In [16]:
t.find('Adrian'), t.find('Christian'), t.find('Benjamin')

(8, 14, 24)

In [17]:
t.insert('David', 22)
t

0: [('Adrian', 8), ('Benjamin', 24), ('David', 22)]
1: [('Bereket', 1)]
2: [('Christian', 14)]

In [18]:
t.insert('Ephraim', 19)
t

0: [('Adrian', 8), ('Benjamin', 24), ('David', 22), ('Ephraim', 19)]
1: [('Bereket', 1)]
2: [('Christian', 14)]

In [19]:
t.insert('Erwin', 26)
t

mEntries = 6 mSize = 3


0: []
1: [('Christian', 14)]
2: []
3: [('David', 22)]
4: [('Adrian', 8)]
5: [('Benjamin', 24), ('Bereket', 1)]
6: [('Ephraim', 19), ('Erwin', 26)]

In [20]:
t.insert('Felix', 4)
t

0: []
1: [('Christian', 14)]
2: []
3: [('David', 22)]
4: [('Adrian', 8)]
5: [('Benjamin', 24), ('Bereket', 1)]
6: [('Ephraim', 19), ('Erwin', 26), ('Felix', 4)]

In [21]:
t.insert('Florian', 9)
t

0: []
1: [('Christian', 14)]
2: []
3: [('David', 22)]
4: [('Adrian', 8)]
5: [('Benjamin', 24), ('Bereket', 1), ('Florian', 9)]
6: [('Ephraim', 19), ('Erwin', 26), ('Felix', 4)]

In [22]:
t.insert('Giorgio', 20)
t

0: []
1: [('Christian', 14), ('Giorgio', 20)]
2: []
3: [('David', 22)]
4: [('Adrian', 8)]
5: [('Benjamin', 24), ('Bereket', 1), ('Florian', 9)]
6: [('Ephraim', 19), ('Erwin', 26), ('Felix', 4)]

In [23]:
t.insert('Jan', 7)
t

0: []
1: [('Christian', 14), ('Giorgio', 20), ('Jan', 7)]
2: []
3: [('David', 22)]
4: [('Adrian', 8)]
5: [('Benjamin', 24), ('Bereket', 1), ('Florian', 9)]
6: [('Ephraim', 19), ('Erwin', 26), ('Felix', 4)]

In [24]:
t.insert('Janis', 16)
t

0: [('Janis', 16)]
1: [('Christian', 14), ('Giorgio', 20), ('Jan', 7)]
2: []
3: [('David', 22)]
4: [('Adrian', 8)]
5: [('Benjamin', 24), ('Bereket', 1), ('Florian', 9)]
6: [('Ephraim', 19), ('Erwin', 26), ('Felix', 4)]

In [25]:
t.insert('Josia', 18)
t

0: [('Janis', 16)]
1: [('Christian', 14), ('Giorgio', 20), ('Jan', 7)]
2: []
3: [('David', 22)]
4: [('Adrian', 8)]
5: [('Benjamin', 24), ('Bereket', 1), ('Florian', 9), ('Josia', 18)]
6: [('Ephraim', 19), ('Erwin', 26), ('Felix', 4)]

In [26]:
t.insert('Kai', 3)
t

0: [('Janis', 16)]
1: [('Christian', 14), ('Giorgio', 20), ('Jan', 7)]
2: []
3: [('David', 22), ('Kai', 3)]
4: [('Adrian', 8)]
5: [('Benjamin', 24), ('Bereket', 1), ('Florian', 9), ('Josia', 18)]
6: [('Ephraim', 19), ('Erwin', 26), ('Felix', 4)]

In [27]:
t.insert('Lars', 21)
t

mEntries = 14 mSize = 7


0: []
1: [('Giorgio', 20), ('David', 22), ('Benjamin', 24)]
2: [('Kai', 3), ('Erwin', 26), ('Felix', 4)]
3: [('Lars', 21)]
4: []
5: []
6: [('Ephraim', 19)]
7: [('Janis', 16)]
8: [('Christian', 14), ('Jan', 7), ('Bereket', 1)]
9: [('Adrian', 8)]
10: [('Florian', 9), ('Josia', 18)]
11: []
12: []

In [28]:
t.insert('Lucas', 0)
t

0: []
1: [('Giorgio', 20), ('David', 22), ('Benjamin', 24)]
2: [('Kai', 3), ('Erwin', 26), ('Felix', 4), ('Lucas', 0)]
3: [('Lars', 21)]
4: []
5: []
6: [('Ephraim', 19)]
7: [('Janis', 16)]
8: [('Christian', 14), ('Jan', 7), ('Bereket', 1)]
9: [('Adrian', 8)]
10: [('Florian', 9), ('Josia', 18)]
11: []
12: []

In [29]:
t.insert('Marcel', 5)
t

0: []
1: [('Giorgio', 20), ('David', 22), ('Benjamin', 24)]
2: [('Kai', 3), ('Erwin', 26), ('Felix', 4), ('Lucas', 0)]
3: [('Lars', 21)]
4: []
5: []
6: [('Ephraim', 19)]
7: [('Janis', 16)]
8: [('Christian', 14), ('Jan', 7), ('Bereket', 1), ('Marcel', 5)]
9: [('Adrian', 8)]
10: [('Florian', 9), ('Josia', 18)]
11: []
12: []

In [30]:
t.insert('Marius', 6)
t

0: []
1: [('Giorgio', 20), ('David', 22), ('Benjamin', 24)]
2: [('Kai', 3), ('Erwin', 26), ('Felix', 4), ('Lucas', 0)]
3: [('Lars', 21)]
4: []
5: [('Marius', 6)]
6: [('Ephraim', 19)]
7: [('Janis', 16)]
8: [('Christian', 14), ('Jan', 7), ('Bereket', 1), ('Marcel', 5)]
9: [('Adrian', 8)]
10: [('Florian', 9), ('Josia', 18)]
11: []
12: []

In [31]:
t.insert('Markus', 17)
t

0: []
1: [('Giorgio', 20), ('David', 22), ('Benjamin', 24)]
2: [('Kai', 3), ('Erwin', 26), ('Felix', 4), ('Lucas', 0), ('Markus', 17)]
3: [('Lars', 21)]
4: []
5: [('Marius', 6)]
6: [('Ephraim', 19)]
7: [('Janis', 16)]
8: [('Christian', 14), ('Jan', 7), ('Bereket', 1), ('Marcel', 5)]
9: [('Adrian', 8)]
10: [('Florian', 9), ('Josia', 18)]
11: []
12: []

In [32]:
t.insert('Matthias', 10)
t

0: []
1: [('Giorgio', 20), ('David', 22), ('Benjamin', 24), ('Matthias', 10)]
2: [('Kai', 3), ('Erwin', 26), ('Felix', 4), ('Lucas', 0), ('Markus', 17)]
3: [('Lars', 21)]
4: []
5: [('Marius', 6)]
6: [('Ephraim', 19)]
7: [('Janis', 16)]
8: [('Christian', 14), ('Jan', 7), ('Bereket', 1), ('Marcel', 5)]
9: [('Adrian', 8)]
10: [('Florian', 9), ('Josia', 18)]
11: []
12: []

In [33]:
t.insert('Nick', 11)
t

0: []
1: [('Giorgio', 20), ('David', 22), ('Benjamin', 24), ('Matthias', 10)]
2: [('Kai', 3), ('Erwin', 26), ('Felix', 4), ('Lucas', 0), ('Markus', 17)]
3: [('Lars', 21)]
4: []
5: [('Marius', 6)]
6: [('Ephraim', 19), ('Nick', 11)]
7: [('Janis', 16)]
8: [('Christian', 14), ('Jan', 7), ('Bereket', 1), ('Marcel', 5)]
9: [('Adrian', 8)]
10: [('Florian', 9), ('Josia', 18)]
11: []
12: []

In [34]:
t.insert('Patrick', 23)
t

0: []
1: [('Giorgio', 20), ('David', 22), ('Benjamin', 24), ('Matthias', 10), ('Patrick', 23)]
2: [('Kai', 3), ('Erwin', 26), ('Felix', 4), ('Lucas', 0), ('Markus', 17)]
3: [('Lars', 21)]
4: []
5: [('Marius', 6)]
6: [('Ephraim', 19), ('Nick', 11)]
7: [('Janis', 16)]
8: [('Christian', 14), ('Jan', 7), ('Bereket', 1), ('Marcel', 5)]
9: [('Adrian', 8)]
10: [('Florian', 9), ('Josia', 18)]
11: []
12: []

In [35]:
t.insert('Petra', 27)
t

0: []
1: [('Giorgio', 20), ('David', 22), ('Benjamin', 24), ('Matthias', 10), ('Patrick', 23)]
2: [('Kai', 3), ('Erwin', 26), ('Felix', 4), ('Lucas', 0), ('Markus', 17)]
3: [('Lars', 21)]
4: []
5: [('Marius', 6)]
6: [('Ephraim', 19), ('Nick', 11)]
7: [('Janis', 16), ('Petra', 27)]
8: [('Christian', 14), ('Jan', 7), ('Bereket', 1), ('Marcel', 5)]
9: [('Adrian', 8)]
10: [('Florian', 9), ('Josia', 18)]
11: []
12: []

In [36]:
t.insert('Rene', 15)
t

0: []
1: [('Giorgio', 20), ('David', 22), ('Benjamin', 24), ('Matthias', 10), ('Patrick', 23)]
2: [('Kai', 3), ('Erwin', 26), ('Felix', 4), ('Lucas', 0), ('Markus', 17)]
3: [('Lars', 21)]
4: []
5: [('Marius', 6)]
6: [('Ephraim', 19), ('Nick', 11), ('Rene', 15)]
7: [('Janis', 16), ('Petra', 27)]
8: [('Christian', 14), ('Jan', 7), ('Bereket', 1), ('Marcel', 5)]
9: [('Adrian', 8)]
10: [('Florian', 9), ('Josia', 18)]
11: []
12: []

In [37]:
t.insert('Sebastian', 25)
t

0: []
1: [('Giorgio', 20), ('David', 22), ('Benjamin', 24), ('Matthias', 10), ('Patrick', 23)]
2: [('Kai', 3), ('Erwin', 26), ('Felix', 4), ('Lucas', 0), ('Markus', 17)]
3: [('Lars', 21)]
4: [('Sebastian', 25)]
5: [('Marius', 6)]
6: [('Ephraim', 19), ('Nick', 11), ('Rene', 15)]
7: [('Janis', 16), ('Petra', 27)]
8: [('Christian', 14), ('Jan', 7), ('Bereket', 1), ('Marcel', 5)]
9: [('Adrian', 8)]
10: [('Florian', 9), ('Josia', 18)]
11: []
12: []

In [38]:
t.insert('Stefan', 2)
t

0: []
1: [('Giorgio', 20), ('David', 22), ('Benjamin', 24), ('Matthias', 10), ('Patrick', 23)]
2: [('Kai', 3), ('Erwin', 26), ('Felix', 4), ('Lucas', 0), ('Markus', 17)]
3: [('Lars', 21)]
4: [('Sebastian', 25)]
5: [('Marius', 6)]
6: [('Ephraim', 19), ('Nick', 11), ('Rene', 15), ('Stefan', 2)]
7: [('Janis', 16), ('Petra', 27)]
8: [('Christian', 14), ('Jan', 7), ('Bereket', 1), ('Marcel', 5)]
9: [('Adrian', 8)]
10: [('Florian', 9), ('Josia', 18)]
11: []
12: []

In [39]:
t.delete('Adrian')
t

0: []
1: [('Giorgio', 20), ('David', 22), ('Benjamin', 24), ('Matthias', 10), ('Patrick', 23)]
2: [('Kai', 3), ('Erwin', 26), ('Felix', 4), ('Lucas', 0), ('Markus', 17)]
3: [('Lars', 21)]
4: [('Sebastian', 25)]
5: [('Marius', 6)]
6: [('Ephraim', 19), ('Nick', 11), ('Rene', 15), ('Stefan', 2)]
7: [('Janis', 16), ('Petra', 27)]
8: [('Christian', 14), ('Jan', 7), ('Bereket', 1), ('Marcel', 5)]
9: []
10: [('Florian', 9), ('Josia', 18)]
11: []
12: []

In [40]:
t.delete('Adrian')
t.delete('Benjamin')
t.delete('Bereket')
t.delete('Christian')
t.delete('Christian')
t.delete('David')
t.delete('Ephraim')
t.delete('Erwin')
t.delete('Felix')
t.delete('Florian')
t.delete('Giorgio')
t.delete('Jan')
t.delete('Janis')
t.delete('Josia')
t.delete('Kai')
t.delete('Lars')
t.delete('Lucas')
t.delete('Marcel')
t.delete('Marius')
t.delete('Markus')
t.delete('Matthias')
t.delete('Nick')
t.delete('Patrick')
t.delete('Petra')
t.delete('Rene')
t.delete('Sebastian')
t.delete('Stefan')
t

0: []
1: []
2: []
3: []
4: []
5: []
6: []
7: []
8: []
9: []
10: []
11: []
12: []