From 86070579cb7244aa811cce7e490b6cce16beabbd Mon Sep 17 00:00:00 2001 From: kmk028 Date: Sun, 1 Nov 2020 20:50:04 -0800 Subject: [PATCH 1/5] added hashing function --- hashtable/hashtable.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/hashtable/hashtable.py b/hashtable/hashtable.py index 0205f0ba9..b7ac34726 100644 --- a/hashtable/hashtable.py +++ b/hashtable/hashtable.py @@ -21,7 +21,9 @@ class HashTable: """ def __init__(self, capacity): - # Your code here + self.capacity = capacity + self.storage = [None]*capacity + self.total = 0 def get_num_slots(self): @@ -34,7 +36,7 @@ def get_num_slots(self): Implement this. """ - # Your code here + return self.capacity def get_load_factor(self): @@ -57,12 +59,15 @@ def fnv1(self, key): def djb2(self, key): - """ - DJB2 hash, 32-bit - - Implement this, and/or FNV-1. - """ - # Your code here + hash = 5381 + for x in s: + # ord(x) simply returns the unicode rep of the + # character x + hash = (( hash << 5) + hash) + ord(x) + # Note to clamp the value so that the hash is + # related to the power of 2 + return hash & 0xFFFFFFFF + def hash_index(self, key): From 1c74b7d3fe43aaf7b95e60eda0a98a6e1b6395bc Mon Sep 17 00:00:00 2001 From: kmk028 Date: Mon, 2 Nov 2020 23:26:14 -0800 Subject: [PATCH 2/5] Added array implementation for get, put and delete --- hashtable/hashtable.py | 26 ++++++++++++++--------- hashtable/test_hashtable_no_collisions.py | 2 +- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/hashtable/hashtable.py b/hashtable/hashtable.py index b7ac34726..655be8a30 100644 --- a/hashtable/hashtable.py +++ b/hashtable/hashtable.py @@ -21,9 +21,9 @@ class HashTable: """ def __init__(self, capacity): + + self.table = [None] * capacity self.capacity = capacity - self.storage = [None]*capacity - self.total = 0 def get_num_slots(self): @@ -36,7 +36,7 @@ def get_num_slots(self): Implement this. """ - return self.capacity + return len(self.table) #self.capacity def get_load_factor(self): @@ -58,15 +58,15 @@ def fnv1(self, key): # Your code here - def djb2(self, key): + def djb2(self, key): #http://pythontutor.com/visualize.html#mode=display hash = 5381 - for x in s: + for x in key: # ord(x) simply returns the unicode rep of the # character x hash = (( hash << 5) + hash) + ord(x) # Note to clamp the value so that the hash is # related to the power of 2 - return hash & 0xFFFFFFFF + return hash & 0xFFFFFFFF @@ -77,6 +77,7 @@ def hash_index(self, key): """ #return self.fnv1(key) % self.capacity return self.djb2(key) % self.capacity + def put(self, key, value): """ @@ -86,7 +87,8 @@ def put(self, key, value): Implement this. """ - # Your code here + self.table[self.hash_index(key)] = value + def delete(self, key): @@ -97,7 +99,10 @@ def delete(self, key): Implement this. """ - # Your code here + val = self.table[self.hash_index(key)] + if val == None: + print('Value is already None') + self.table[self.hash_index(key)] = None def get(self, key): @@ -107,9 +112,10 @@ def get(self, key): Returns None if the key is not found. Implement this. - """ - # Your code here + """ + return self.table[self.hash_index(key)] + def resize(self, new_capacity): """ diff --git a/hashtable/test_hashtable_no_collisions.py b/hashtable/test_hashtable_no_collisions.py index a9b755b3d..2e0d2d518 100644 --- a/hashtable/test_hashtable_no_collisions.py +++ b/hashtable/test_hashtable_no_collisions.py @@ -11,7 +11,7 @@ class TestHashTable(unittest.TestCase): def test_hash_table_insertion_and_retrieval(self): - ht = HashTable(0x10000) + ht = HashTable(0x10) ht.put("key-0", "val-0") ht.put("key-1", "val-1") From dd4a9c864cf7ea1b732679e2c722a2255b86fb02 Mon Sep 17 00:00:00 2001 From: kmk028 Date: Tue, 3 Nov 2020 00:55:27 -0800 Subject: [PATCH 3/5] added LinkedList class and its helper functions --- hashtable/hashtable.py | 60 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/hashtable/hashtable.py b/hashtable/hashtable.py index 655be8a30..18b4ed0eb 100644 --- a/hashtable/hashtable.py +++ b/hashtable/hashtable.py @@ -7,6 +7,66 @@ def __init__(self, key, value): self.value = value self.next = None +class Node: + def __init__(self,value): + self.value = value + self.next = None + + class LinkedList: + def __init__(self): + self.head = None + + def __repr__(self): # Walking through Linked list and printing it out whats within it. + currStr = "" + curr = self.head + while curr != None: + currStr += f'{str('curr.value')}-->' + curr = curr.next + return currStr + #Helper Methods + #O(n) where n is number of nodes + def find(self,value): + # return node with 'value' + curr = self.head + while curr != None: + if curr.value == value + return curr + curr = curr.next + return None + #O(n) where n is number of nodes + def delete(self,value): + # deletes node with given value + curr = self.head + + #Special case if we want to delete the head + if curr.value == value: + self.head = curr.next + curr.next = None + return curr + + prev = None + while curr != None: + if curr.value == value: + prev.next = curr.next + curr.next = None + return curr + else: + prev = curr + curr = curr.next + + return None + #O(1) where n is number of nodes + def insert_at_head(self,node): + node.next = self.head + self.head = node + #O(n) where n is number of nodes because of 'find' + def insert_at_head_or_overwrite(self,node): + #insert node at head or overwrite the node + existing_node = self.find(node.value) + if existing_node != None: + existing_node.value = node.value + else: + self.insert_at_head(node) # Hash table can't have fewer than this many slots MIN_CAPACITY = 8 From fc981a6a13bc9555eb1e8336f3a3da1d520e0822 Mon Sep 17 00:00:00 2001 From: kmk028 Date: Sun, 8 Nov 2020 01:03:59 -0800 Subject: [PATCH 4/5] put,get,delete and resize implemented with collision handling --- hashtable/hashtable.py | 169 ++++++++++++++++++++++++++++------------- 1 file changed, 115 insertions(+), 54 deletions(-) diff --git a/hashtable/hashtable.py b/hashtable/hashtable.py index 18b4ed0eb..d5904d855 100644 --- a/hashtable/hashtable.py +++ b/hashtable/hashtable.py @@ -7,71 +7,77 @@ def __init__(self, key, value): self.value = value self.next = None + def __eq__(self, other): + if isinstance(other,HashTableEntry): + return self.key == other.key + return False + class Node: def __init__(self,value): self.value = value self.next = None - class LinkedList: - def __init__(self): - self.head = None - - def __repr__(self): # Walking through Linked list and printing it out whats within it. - currStr = "" - curr = self.head - while curr != None: - currStr += f'{str('curr.value')}-->' - curr = curr.next - return currStr +class LinkedList: + def __init__(self): + self.head = None + + def __repr__(self): # Walking through Linked list and printing it out whats within it. + currStr = "" + curr = self.head + while curr != None: + currStr += f'{str(curr.value)}-->' + curr = curr.next + return currStr #Helper Methods #O(n) where n is number of nodes - def find(self,value): + def find(self,value): # return node with 'value' - curr = self.head - while curr != None: - if curr.value == value - return curr - curr = curr.next - return None + curr = self.head + while curr != None: + if curr.value == value: + return curr + curr = curr.next + return None #O(n) where n is number of nodes - def delete(self,value): + def delete(self,value): # deletes node with given value - curr = self.head + curr = self.head #Special case if we want to delete the head + if curr.value == value: + self.head = curr.next + curr.next = None + return curr + + prev = None + while curr != None: if curr.value == value: - self.head = curr.next + prev.next = curr.next curr.next = None return curr + else: + prev = curr + curr = curr.next - prev = None - while curr != None: - if curr.value == value: - prev.next = curr.next - curr.next = None - return curr - else: - prev = curr - curr = curr.next - - return None + return None #O(1) where n is number of nodes - def insert_at_head(self,node): - node.next = self.head - self.head = node + def insert_at_head(self,node): + node.next = self.head + self.head = node #O(n) where n is number of nodes because of 'find' - def insert_at_head_or_overwrite(self,node): + def insert_at_head_or_overwrite(self,node): #insert node at head or overwrite the node - existing_node = self.find(node.value) - if existing_node != None: - existing_node.value = node.value - else: - self.insert_at_head(node) + existing_node = self.find(node.value) + if existing_node != None: + existing_node.value = node.value + return False + else: + self.insert_at_head(node) + return True # Hash table can't have fewer than this many slots MIN_CAPACITY = 8 - class HashTable: """ A hash table that with `capacity` buckets @@ -84,6 +90,7 @@ def __init__(self, capacity): self.table = [None] * capacity self.capacity = capacity + self.num_elements = 0 def get_num_slots(self): @@ -105,7 +112,8 @@ def get_load_factor(self): Implement this. """ - # Your code here + # Load factor = num of elements in hash table/ num of slots + return self.num_elements/self.get_num_slots() def fnv1(self, key): @@ -140,6 +148,7 @@ def hash_index(self, key): def put(self, key, value): + """ Store the value with the given key. @@ -147,9 +156,22 @@ def put(self, key, value): Implement this. """ - self.table[self.hash_index(key)] = value - - + # self.table[self.hash_index(key)] = value --> Array implementation + hash_index = self.hash_index(key) + if self.table[hash_index] != None: + linked_list = self.table[hash_index] + did_add_new_node = linked_list.insert_at_head_or_overwrite(HashTableEntry(key,None)) + if did_add_new_node: + self.num_elements +=1 + + else: + linked_list = LinkedList() + linked_list.insert_at_head(HashTableEntry(key,None)) + self.table[hash_index] = linked_list + self.num_elements +=1 + + if self.get_load_factor() > 0.7: + self.resize(self.get_num_slots*2) def delete(self, key): """ @@ -159,13 +181,24 @@ def delete(self, key): Implement this. """ - val = self.table[self.hash_index(key)] - if val == None: - print('Value is already None') - self.table[self.hash_index(key)] = None - + # val = self.table[self.hash_index(key)] --> Array implementation + # if val == None: + # print('Value is already None') + # self.table[self.hash_index(key)] = None + hash_index = self.hash_index(key) + if self.table[hash_index] != None: + linked_list = self.table[hash_index] + did_delete_node = linked_list.delete(HashTableEntry(key,None)) + if did_delete_node != None: + self.num_elements -= 1 + if self.get_load_factor < 0.2: + self.resize(self.get_num_slots()/2) + + else: + print(f'This key does not exist!') def get(self, key): + """ Retrieve the value stored with the given key. @@ -174,8 +207,14 @@ def get(self, key): Implement this. """ - return self.table[self.hash_index(key)] - + # return self.table[self.hash_index(key)] --> Array implementation + hash_index = self.hash_index(key) + if self.table[hash_index] != None: + link_list = self.table[hash_index] + node = link_list.find(HashTableEntry(key,None)) + if node != None: + return node.value.value + return None def resize(self, new_capacity): """ @@ -184,7 +223,29 @@ def resize(self, new_capacity): Implement this. """ - # Your code here + old_table = self.table + new_table = [None]* int(new_capacity) + self.num_elements = 0 + + for element in old_table: + if element is None: + continue + curr_node = element.head + while curr_node != None: + temp = curr_node + curr_node.next = None # remove the reference since its moving to new table + hash_index = self.hash_index(curr_node.value.key) # this is where its going to be in new table + + if self.table[hash_index] != None: + self.table[hash_index].insert_at_head(curr_node) # Inseert at head if a linked list already exists in this new table + else: + linked_list = LinkedList() # if nothing is present at this index, create new node and add it at head. + linked_list.insert_at_head(curr_node) + self.table[hash_index] = linked_list + + curr_node = temp + self.num_elements += 1 + From 96c077b3d7a8c4059d73437393c05879d882f15a Mon Sep 17 00:00:00 2001 From: kmk028 Date: Mon, 9 Nov 2020 19:26:27 -0800 Subject: [PATCH 5/5] resize is not working --- hashtable/hashtable.py | 72 +++++++++++++---------- hashtable/test_hashtable_no_collisions.py | 2 + 2 files changed, 43 insertions(+), 31 deletions(-) diff --git a/hashtable/hashtable.py b/hashtable/hashtable.py index d5904d855..24a26c884 100644 --- a/hashtable/hashtable.py +++ b/hashtable/hashtable.py @@ -8,15 +8,22 @@ def __init__(self, key, value): self.next = None def __eq__(self, other): - if isinstance(other,HashTableEntry): + if isinstance(other, HashTableEntry): return self.key == other.key return False -class Node: - def __init__(self,value): - self.value = value - self.next = None + def __repr__(self): + return f'HashTableEntry({self.key},{self.value}' + +# class Node: +# def __init__(self,value): +# self.value = value +# self.next = None + +# def __repr__(self): +# return f'Node({self.value})' + class LinkedList: def __init__(self): self.head = None @@ -25,18 +32,20 @@ def __repr__(self): # Walking through Linked list and printing it out whats with currStr = "" curr = self.head while curr != None: - currStr += f'{str(curr.value)}-->' + currStr += f'{str(curr)}' curr = curr.next return currStr #Helper Methods #O(n) where n is number of nodes - def find(self,value): - # return node with 'value' - curr = self.head - while curr != None: - if curr.value == value: - return curr - curr = curr.next + def find(self, value): + cur = self.head + + while cur is not None: + if cur == value: + return cur + + cur = cur.next + return None #O(n) where n is number of nodes def delete(self,value): @@ -136,8 +145,6 @@ def djb2(self, key): #http://pythontutor.com/visualize.html#mode=display # related to the power of 2 return hash & 0xFFFFFFFF - - def hash_index(self, key): """ Take an arbitrary key and return a valid integer index @@ -146,7 +153,6 @@ def hash_index(self, key): #return self.fnv1(key) % self.capacity return self.djb2(key) % self.capacity - def put(self, key, value): """ @@ -160,18 +166,22 @@ def put(self, key, value): hash_index = self.hash_index(key) if self.table[hash_index] != None: linked_list = self.table[hash_index] - did_add_new_node = linked_list.insert_at_head_or_overwrite(HashTableEntry(key,None)) + did_add_new_node = linked_list.insert_at_head_or_overwrite(HashTableEntry(key,value)) + if did_add_new_node: self.num_elements +=1 else: linked_list = LinkedList() - linked_list.insert_at_head(HashTableEntry(key,None)) + linked_list.insert_at_head(HashTableEntry(key,value)) + self.table[hash_index] = linked_list self.num_elements +=1 if self.get_load_factor() > 0.7: - self.resize(self.get_num_slots*2) + self.resize(self.get_num_slots()*2) + + def delete(self, key): """ @@ -191,29 +201,29 @@ def delete(self, key): did_delete_node = linked_list.delete(HashTableEntry(key,None)) if did_delete_node != None: self.num_elements -= 1 - if self.get_load_factor < 0.2: + if self.get_load_factor() < 0.2: + print(f'load factor is {self.get_load_factor()}') + print(f'self.get_num_slots()/2 is {self.get_num_slots()/2}') self.resize(self.get_num_slots()/2) else: print(f'This key does not exist!') def get(self, key): - """ Retrieve the value stored with the given key. Returns None if the key is not found. Implement this. - """ - # return self.table[self.hash_index(key)] --> Array implementation hash_index = self.hash_index(key) if self.table[hash_index] != None: - link_list = self.table[hash_index] - node = link_list.find(HashTableEntry(key,None)) + linked_list = self.table[hash_index] + + node = linked_list.find(HashTableEntry(key,None)) if node != None: - return node.value.value + return node.value return None def resize(self, new_capacity): @@ -224,17 +234,20 @@ def resize(self, new_capacity): Implement this. """ old_table = self.table - new_table = [None]* int(new_capacity) + self.table = [None]* int(new_capacity) self.num_elements = 0 for element in old_table: if element is None: continue curr_node = element.head + while curr_node != None: - temp = curr_node + temp = curr_node.next curr_node.next = None # remove the reference since its moving to new table + print(curr_node.key) hash_index = self.hash_index(curr_node.value.key) # this is where its going to be in new table + print (hash_index) if self.table[hash_index] != None: self.table[hash_index].insert_at_head(curr_node) # Inseert at head if a linked list already exists in this new table @@ -245,9 +258,6 @@ def resize(self, new_capacity): curr_node = temp self.num_elements += 1 - - - if __name__ == "__main__": ht = HashTable(8) diff --git a/hashtable/test_hashtable_no_collisions.py b/hashtable/test_hashtable_no_collisions.py index 2e0d2d518..ec87d7883 100644 --- a/hashtable/test_hashtable_no_collisions.py +++ b/hashtable/test_hashtable_no_collisions.py @@ -17,7 +17,9 @@ def test_hash_table_insertion_and_retrieval(self): ht.put("key-1", "val-1") ht.put("key-2", "val-2") + return_value = ht.get("key-0") + print (return_value) self.assertTrue(return_value == "val-0") return_value = ht.get("key-1") self.assertTrue(return_value == "val-1")