# Hashtables


Hash tables are a data structure that allows us to store elements of any type
in constant time using the following concepts:
* Store elements in an array
* Use a constant time **hash function** to compute for each element 
what its index in the array should be
* Use a **linked list in each entry** of the array to avoid collisions 
(when two elements are being assigned to the same index)

Suppose an array of size 10, each entry in the array is an empty linked list: 

| Array | - | - | - | - | - | - | - | - | - | - |
|-------|---|---|---|---|---|---|---|---|---|---|
| Index | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |

* The hashing function is the **{element} modulo {size of the array}**
* Therefore, **given the size 10 the index of any element e is e%10**

Adding elements:
1. Calculate index of element 
2. Place index in the head of the linked list

In [None]:
class HashTable:
    
    def __init__(self):
        self.internal_list = [LinkedList() for i in range(10)]
        self.size = 0
    
    def hash(self, d):
        return d % len(self.internal_list)
    
    def add(self, d):
        i = self.hash(d)
        self.internal_list[i].insert(0, d)
        self.size += 1
    
    def search(self, d):
        i = self.hash(d)
        if self.internal_list[i].search(d) == -1:
            return False
        
        return True
    
    def remove(self, d):
        i = self.hash(d)
        old_length = self.internal_list[i].length
        self.internal_list[i].remove_value(d)
        
        if self.internal_list[i].length < old_length:
            self.size -= 1      

### Uniform hashing 
The hash function ensures that elements are uniformly distributed in the 
linked lists of the hash table. <br>
By spreading uniformly means that each element has equal probability 
to be assigned to any of the linked links in the hash table. 
* If the hash table has **n** elements and an internal array of __m__
    * Then the average length of each linked list is going to be **n/m**

Which means :
* As elements are added to the hash table
    * The ratio **n/m** increases 
    * The performance decreases (search anc remove take longer)
    
Solution: 
* Increase the size of the internal array whenever the ratio gets above
a **constant threshold**

In [None]:
class UniformHashTable:
    
    def __init__(self):
        self.internal_list = [LinkedList() for i in range(10)]
        self.size = 0
        self.threshold = 0.75
        
    def hash(self, d):
        length = self.internal_list
        return ((d//length) + (d % length)) % length
    
    def add(self, d):
        i = self.hash(d)
        self.internal_list[i].insert(0, d)
        self.size += 1
        
        if self.size > self.threshold*len(self.internal_list):
            self._resize_up()
    
    def _resize_up(self):
        old_array = self.internal_list
        self.internal_list = [LinkedList() for i in range(2*len(old_array))]
        
        # Go through all elements of the old array and
        # Remove it from the old array
        # Compute its new hash value 
        # Insert the element with the new has as the index into the new array
        for i in range(len(old_array)):
            while old_array[i].length > 0:
                d = old_array.remove(0)
                new_hash_index = self.hash(d)
                self.internal_list[new_hash_index].insert(0, d)
 
  



















In [None]:
class Node:
    
    def __init__(self, d, n):
        self.data = d
        self.next = n

class LinkedList:
    
    def __init__(self):
        self.head = None
        self.length = 0
        
    def search(self, data):
        i = 0
        pointer = self.head
        
        while pointer is not None :
            if pointer.data == data:
                return i
            pointer = pointer.next
            i += 1
            
        return -1
    
    def append(self, data):
        if self.head is None:
            self.head = Node(data, None)
        else:
            pointer = self.head
            
            # Get to the last pointer (i.e) the one that is null
            while pointer is not None:
                pointer = pointer.next
            
            # Point this pointer to a new Node with the data
            pointer.next = Node(data, None)
        
        self.length += 1
    
    def insert(self, data, i):
        if self.head is None:
            self.head = Node(data, None)
        elif i == 0:
            self.head = Node(data, self.head)
        else:
            pointer = self.head
            
            # Start from the head and move the pointer to the next 
            # Until we decrement i until it reaches 1
            # We decrement because we start from the head (0) and move to 
            # next i times. 
            while i > 1 and pointer.next is not None:
                pointer = pointer.next
                i -= 1
            
            pointer.next = Node(data, pointer.next)
        
        self.length += 1
        
    def remove(self, i):
        if self.head is None:
            return  None
        elif i == 0:
            removed = self.head.data
            self.head = self.head.next
            self.length -= 1
            return removed
        
        else:
            pointer = self.head
            
            # Start from the head and move the pointer to the next 
            # Until we decrement i til it reaches 1
            # We decrement because we start from the head (0) and move to 
            # next i times.
            while i > 1 and pointer.next is not None:
                pointer = pointer.next
                i -= 1
            
            if i == 1:
                removed = pointer.next.data
                pointer.next = pointer.next.next
                self.length -= 1
                return removed
        
        return None
