## Hasing, Hash Table and HashMap

In [369]:
def simple_hash(inp, seed=1):
    """
    A hash function that takes inp and seed to generate hased value of the input.
    Each char of the inp string is converted to it's ascii value and mutiplied with squared seed.
    Sum of each hash is num generated from above step is also added to avoid same has value for
    simillar char ending words.
    eg, without the sum addition, Steve and Bruce will have same position as both of the ends with "e"
    
    Try to provide prime numbered seed. The range should be determined by usecase.
    """
    if inp and seed > 0:
        nums = [str(ord(i) * seed**2) for i in inp]
        return int("".join(nums)) + sum(map(int, nums))
    else:
        raise Exception("Inavlid input/seed")

In [370]:
simple_hash("My name is Binay")

77121321109710910132105115326610511098551

In [371]:
simple_hash("My name is Binay", seed=5)

1925302580027502425272525258002625287580016502625275024288775

In [372]:
simple_hash("My name is Silky", seed=5)

1925302580027502425272525258002625287580020752625270026789400

In [373]:
simple_hash("a", seed=30)

174600

In [374]:
class LinkedNode:
    """
    Linked list implementation to tackle hash collision.
    """
    def __init__(self, key, val=None, _next=None):
        self.key = key
        self.val = val
        self.next = _next
    
    def __repr__(self):
        return "Data: {0}: {1}".format(self.key, self.val)

def fetch_key(node, key):
    """
    Traverses the Full linked list to get the searched key value.
    """
    if key is node.key:
        return node.val
    elif node.next:
        return fetch_key(node.next, key)
    else:
        raise Exception("Item not found")

def get_all_data(node, container):
    """
    Fetches all the keys a values stored in the linked list.
    """
    container.append((node.key,node.val))
    if node.next:
        get_all_data(node.next, container)
    
    return container
            

In [375]:
seed = 5

class Hashmap:
    """
    A Simple hash-table/hash-map implementation with a 10 length container,
    which resizes automatically upon 80 occupancy.
    """
    def __init__(self, fill_factor=0.8):
        self.length = 10
        self.container = [None for i in range(self.length)]
        self.items_count = 0
        self._fill_factor = fill_factor

    def add(self, key, value=None):
        hashed = simple_hash(str(key), seed=seed)
        position = hashed % self.length

        # Check for fill factor and resize the map
        if self.items_count + 1 >= self.length * self._fill_factor:
            self.resize()

        # check if position is empty or key is being reused.
        if not self.container[position] or self.container[position].key == key:
            self.container[position] = LinkedNode(key, value)
        # in case of collision use the next item
        else:
            self.container[position].next = LinkedNode(key, value)
        # Increase the item count
        self.items_count +=1
    
    def get(self, key):
        hashed = simple_hash(str(key), seed=seed)
        position = hashed % self.length
        if key == self.container[position].key:
            return self.container[position].val
        else:
            try:
                return fetch_key(self.container[position], key)
            except:
                raise KeyError("Item not found")
    
    def resize(self):
        all_items = []
        for i in self.container:
            if i:
                get_all_data(i, all_items)
        self.length *= 2
        self.container = [None for i in range(self.length)]
        self.items_count = 0
        
        for data in all_items:
            self.add(data[0], data[1])

        

In [376]:
_map = Hashmap()
# _map.container

In [377]:
_map.add('name', 'John')
# print(len(_map.container))
_map.add('name', 'Doe')
# print(len(_map.container))
_map.add('job', 'engineer')
# print(len(_map.container))

In [378]:
_map.get('name')

'Doe'

## Consistent Hashing

In [379]:
class HashedCell:
    """
    Cell Placeholder to hold server and data value.
    Also Linked List implementation handles the data collision.
    """
    def __init__(self, index, server=None):
        self.server = server
        self.data = None
        self.index = index
    
    def add_data(self, key=None, val=None):
        self.data = LinkedNode(key, val)
    
    def __repr__(self):
        return "Index: {0}, Server: {1}, Data: {2}: {3}".format(
            self.index, 
            self.server.server_name if self.server else None, 
            self.data.key if self.data else None, self.data.val if self.data else None)
    

class Server:
    """
    Placeholder for server data.
    """
    def __init__(self, name):
        self.server_name = name
        self.process_queue = []
        self.attached_to = []
    
    def __repr__(self):
        return "Server: {0}, Attached at: {1}, To Process: {2}".format(self.server_name, str(self.attached_to), str(self.process_queue))
    
    def process(self, data_index):
        self.process_queue.append(data_index)
    

In [380]:
# Seed Values
server_seed = 5
server_seed_2 = 8
server_seed_3 = 7
server_seed_4 = 9
data_seed = 13

class ConsistentHash:
    """A 360 Degree circular implementation of load distribution.
    Supports Server addition, deletion, data adition, even distribution.
    """
    def __init__(self):
        self.total_angles = 360
        self.container = [HashedCell(i) for i in range(self.total_angles)]
        self.servers = []
    
    def __repr__(self):
        return '\n'.join([str((c.index, c.data, c.server)) for c in ch.container])
    
    def add_servers(self, *args):
        """
        Servers to be added at multiple position with the help of different
        hash values generated from different seed.
        """
        servers = list(args)
        for server in servers:
            _hash1 = simple_hash(str(server), seed=server_seed)
            _hash2 = simple_hash(str(server), seed=server_seed_2)
            _hash3 = simple_hash(str(server), seed=server_seed_3)
            _hash4 = simple_hash(str(server), seed=server_seed_4)
            position_1 = _hash1 % self.total_angles
            position_2 = _hash2 % self.total_angles
            position_3 = _hash3 % self.total_angles
            position_4 = _hash4 % self.total_angles
            server_obj = Server(server)
            server_obj.attached_to.extend([position_1, position_2, position_3, position_4])
            self.servers.append(server_obj)
            self.container[position_1].server = self.container[position_2].server = self.container[position_3].server = self.container[position_4].server = server_obj
            print("Server: {0} attached on positions {1}, {2}, {3}, {4}".format(server_obj.server_name, position_1, position_2, position_3, position_4))
            
    def stop_server(self, server_name):
        """Removal of server."""
        server = list(filter(lambda x: x.server_name == server_name, self.servers))
        if server: 
            server = server[0]
            reset_positions = server.attached_to
            for location in reset_positions:
                self.container[location].server = None
            self.servers = [server for server in self.servers if server.server_name != server_name]
            # self.process()
            print("Server: {} stopped".format(server_name))
        else: raise Exception("Server Not Found")
    
    def add_data(self, key, value=None):
        """Data addition."""
        _hash = simple_hash(str(key), seed=data_seed)
        position = _hash % self.total_angles

        # check if position is empty or key is being reused.
        if not self.container[position].data or self.container[position].data.key == key:
            self.container[position].add_data(key, value)
        # in case of collision use the next item
        else:
            self.container[position].data.next = LinkedNode(key, value)
        print("Data: {0}: {1} attached on position {2}".format(key, value, position))
    
    def find_next_server(self, index):
        """Find the next available server on the clock."""
        _next = index + 1
        flag = True
        while flag:
            if _next == 361: _next = 1
            if self.container[_next].server:
                return _next
            else:
                _next +=1
            if _next == index: flag = False
        return None
    
    def process(self):
        """Assign the datas to server properly."""
        for server in self.servers:
            server.process_queue = []
        for item in self.container:
            if item.data and item.data.key and item.server:
                item.server.process(item.index)
                print("Data: {0}: {1} to be handled by {2} at {3}".format(item.data.key, item.data.val, item.server.server_name, item.index))
            elif not item.data: continue
            elif item.data.key and not item.server:
                server_index = self.find_next_server(item.index)
                self.container[server_index].server.process(item.index)
                print("Data: {0}: {1} to be handled by {2} at index {3}".format(item.data.key, item.data.val, self.container[server_index].server.server_name, self.container[server_index].index))
                

In [381]:
ch = ConsistentHash()

In [382]:
ch.add_servers('server1', 'server2', 'server3', 'server4')

Server: server1 attached on positions 185, 344, 89, 81
Server: server2 attached on positions 235, 112, 187, 243
Server: server3 attached on positions 285, 240, 285, 45
Server: server4 attached on positions 335, 8, 23, 207


In [383]:
ch.add_data(5)
ch.add_data(15)
ch.add_data(30000000000000000000)
ch.add_data('This hash is great')
ch.add_data('Fuck')
ch.add_data('Google')

Data: 5: None attached on position 274
Data: 15: None attached on position 195
Data: 30000000000000000000: None attached on position 99
Data: This hash is great: None attached on position 239
Data: Fuck: None attached on position 300
Data: Google: None attached on position 154


In [384]:
ch.servers

[Server: server1, Attached at: [185, 344, 89, 81], To Process: [],
 Server: server2, Attached at: [235, 112, 187, 243], To Process: [],
 Server: server3, Attached at: [285, 240, 285, 45], To Process: [],
 Server: server4, Attached at: [335, 8, 23, 207], To Process: []]

In [385]:
ch.process()

Data: 30000000000000000000: None to be handled by server2 at index 112
Data: Google: None to be handled by server1 at index 185
Data: 15: None to be handled by server4 at index 207
Data: This hash is great: None to be handled by server3 at index 240
Data: 5: None to be handled by server3 at index 285
Data: Fuck: None to be handled by server4 at index 335


In [386]:
ch.servers

[Server: server1, Attached at: [185, 344, 89, 81], To Process: [154],
 Server: server2, Attached at: [235, 112, 187, 243], To Process: [99],
 Server: server3, Attached at: [285, 240, 285, 45], To Process: [239, 274],
 Server: server4, Attached at: [335, 8, 23, 207], To Process: [195, 300]]

In [387]:
ch

(0, None, None)
(1, None, None)
(2, None, None)
(3, None, None)
(4, None, None)
(5, None, None)
(6, None, None)
(7, None, None)
(8, None, Server: server4, Attached at: [335, 8, 23, 207], To Process: [195, 300])
(9, None, None)
(10, None, None)
(11, None, None)
(12, None, None)
(13, None, None)
(14, None, None)
(15, None, None)
(16, None, None)
(17, None, None)
(18, None, None)
(19, None, None)
(20, None, None)
(21, None, None)
(22, None, None)
(23, None, Server: server4, Attached at: [335, 8, 23, 207], To Process: [195, 300])
(24, None, None)
(25, None, None)
(26, None, None)
(27, None, None)
(28, None, None)
(29, None, None)
(30, None, None)
(31, None, None)
(32, None, None)
(33, None, None)
(34, None, None)
(35, None, None)
(36, None, None)
(37, None, None)
(38, None, None)
(39, None, None)
(40, None, None)
(41, None, None)
(42, None, None)
(43, None, None)
(44, None, None)
(45, None, Server: server3, Attached at: [285, 240, 285, 45], To Process: [239, 274])
(46, None, None)
(47, Non

In [388]:
ch.stop_server('server4')

Server: server4 stopped


In [389]:
#ch.stop_server('server5')

In [390]:
ch.servers

[Server: server1, Attached at: [185, 344, 89, 81], To Process: [154],
 Server: server2, Attached at: [235, 112, 187, 243], To Process: [99],
 Server: server3, Attached at: [285, 240, 285, 45], To Process: [239, 274]]

In [391]:
ch.process()

Data: 30000000000000000000: None to be handled by server2 at index 112
Data: Google: None to be handled by server1 at index 185
Data: 15: None to be handled by server2 at index 235
Data: This hash is great: None to be handled by server3 at index 240
Data: 5: None to be handled by server3 at index 285
Data: Fuck: None to be handled by server1 at index 344


In [392]:
ch.servers

[Server: server1, Attached at: [185, 344, 89, 81], To Process: [154, 300],
 Server: server2, Attached at: [235, 112, 187, 243], To Process: [99, 195],
 Server: server3, Attached at: [285, 240, 285, 45], To Process: [239, 274]]

In [393]:
ch

(0, None, None)
(1, None, None)
(2, None, None)
(3, None, None)
(4, None, None)
(5, None, None)
(6, None, None)
(7, None, None)
(8, None, None)
(9, None, None)
(10, None, None)
(11, None, None)
(12, None, None)
(13, None, None)
(14, None, None)
(15, None, None)
(16, None, None)
(17, None, None)
(18, None, None)
(19, None, None)
(20, None, None)
(21, None, None)
(22, None, None)
(23, None, None)
(24, None, None)
(25, None, None)
(26, None, None)
(27, None, None)
(28, None, None)
(29, None, None)
(30, None, None)
(31, None, None)
(32, None, None)
(33, None, None)
(34, None, None)
(35, None, None)
(36, None, None)
(37, None, None)
(38, None, None)
(39, None, None)
(40, None, None)
(41, None, None)
(42, None, None)
(43, None, None)
(44, None, None)
(45, None, Server: server3, Attached at: [285, 240, 285, 45], To Process: [239, 274])
(46, None, None)
(47, None, None)
(48, None, None)
(49, None, None)
(50, None, None)
(51, None, None)
(52, None, None)
(53, None, None)
(54, None, None)
(55, N

## Bloom Filters

In [394]:
import numpy as np

In [395]:
# Try to have all prime numbers to avoid factors with each other.
# Try to have the numbers above 100 for larger hash values.
seed1 = 211
seed2 = 317
seed3 = 359
seed4 = 281

class BloomStore:
    """Probabilistic availability filter aka Bloom Filter.
    Each item is separtely hased with different seed value, noted on the bitmap and added to db.
    When search The same hash is calculated and determined if the data encountered before or not.
    """
    def __init__(self):
        self.size = 64000
        self.container = np.zeros(self.size, dtype = int)
    
    def _hash(self, key):
        hash1 = simple_hash(str(key), seed=seed1)
        hash2 = simple_hash(str(key), seed=seed2)
        hash3 = simple_hash(str(key), seed=seed3)
        # hash4 = simple_hash(key, seed=seed4)
        
        pos1 = hash1 % self.size
        pos2 = hash2 % self.size
        pos3 = hash3 % self.size
        return pos1, pos2, pos3
    
    def add(self, key, val=None):
        pos1, pos2, pos3 = self._hash(key)
        # print(key, pos1, pos2, pos3)
        self.container[pos1] = self.container[pos2] = self.container[pos3] = 1
        # Add Data to db
    
    def is_available(self, key):
        pos1, pos2, pos3 = self._hash(key)
        # print(pos1, pos2, pos3, self.container[pos1], self.container[pos2], self.container[pos3])
        if self.container[pos1] and self.container[pos2] and self.container[pos3]:
            # Go fetch and return data
            return True
        else:
            return False
    
    
        

In [396]:
bf = BloomStore()

In [397]:
bf.add('Binay')
bf.add('Tony')
bf.add('Steve')
bf.add('Bruce')
bf.add('Odinson')

In [398]:
bf.is_available('Bruce')

True

In [399]:
bf.is_available('Bruce Wayne')

False

In [400]:
bf.is_available('Hulk')

False

In [401]:
bf.is_available('Thor')

False