diff --git a/hashtable/class_example.py b/hashtable/class_example.py new file mode 100644 index 000000000..431bb4648 --- /dev/null +++ b/hashtable/class_example.py @@ -0,0 +1,166 @@ +# lorem ipsum +my_arr = ["Lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit" ] + +# search for an element, for example consectetur + +# O(n) +# for word in my_arr: +# if word == 'consectetur': +# return True + +# if 'elit' in my_arr: +# return True + +# O(log n) +# sort array, then run binary search on it + +# what if we could find the index of the element in O(1) time? +# then we could take 1 more step to access the element: my_arr[5] + +# we would have O(1) search + +# we would like a function that returns the index + +# Hash function or hashing function + +## do you have to track where you've put things in the underlying array? + + +# Hash functions +# Write a function that takes a string and turns it into a number + +# hash the string with a hashing function....and you get back a hash + +my_arr = [None] * 8 + +# it's fast +# deterministic +# can't get the input from the output + +def len_hash(s): + return len(s) # for this example, we will use the length of the word as the index + +# Use the hashing function to put the word 'hello' into the array +hello_number = len_hash('hello') # use hashing function to get an index +my_arr[hello_number] = 'hello' + + +## some time passes... +hello_number = len_hash('hello') # use hashing function to find the index +my_arr[hello_number] # pull out the word we want + +# what about words of the same length? +world_number = len_hash('world') +my_arr[world_number] = 'world' + + +world_number = len_hash('world') +my_arr[world_number] + +# what about long words? +long_word = 'supercalifragilisticexpialidocious' +long_word_hash = len_hash(long_word) + +long_word_idx = long_word_hash % len(my_arr) + +my_arr[long_word_idx] = long_word + +## how to fix this? +### dynamic array? + +### use modulo, aka 'mod the number' + + +# the problem with arrays: search is slow +# How to get faster? +# To reach O(1), make a magic function to return the index of the target word in O(1) time +# made simple hash function +# make the hash function and array play nice together + +# Let's improve our hash function, by making it more unique + +## add up the letters +### assign a number to every letter +### ASCII has already done this + +def add_hash(s): + total = 0 + for letter in s: + total += ord(letter) + return total + +### won't work for anagrams! +#### dad vs add + +# UTF-8, ASCII on steroids +# encode +def utf8_hash(s): + total = 0 + string_bytes = s.encode() + + for b in string_bytes: + total += b + return total + +# we can do math on the bytes of the string! + +my_arr = [None] * 10000 + +def put(key, value): + # turn the key into an index + hashed_string = utf8_hash(key) + idx = hashed_string % len(my_arr) + + # put the value at that index in our array + my_arr[idx] = value + +put('hello', 'hello world') + +# what is the time complexity here? +## if you measure by the length of the key, O(n) +## if you measure by the number of slots / length of array, then it's O(1) + +def get(s): + hashed_string = utf8_hash(s) # turn string into number + + idx = hashed_string % len(my_arr) # turn number into index + + value = my_arr[idx] # go and access element at that index + + return value + +get('hello') ## get the key + + +# Delete: find the value, then set to None + +# Put +## 1. Hash our string/key, get out a number +## 2. Take this number and modulo it by the length of the array +## 3. This new number can be used as an index, so put the value at that index in our array + +# Get +## 1. Hash our string/key, string --> number +## 2. Mod this number by length of array +## 3. Use this modded number / index to get the value there + + + + +## Common use-cases? +### hashing functions: encryption +### Fast O(1) lookup of values using a key to find it + +## Easy to think about time complexity for arrays vs objects/dictionaries + +# if x in my_data_structure: ## O(n) for an array, runs get() --> O(1) for a hash table + +# look up user profile from username, 1billion users + + + + + # Couldn't we end up with the wrong modulo if we've increased the size of the array between put and get? + # Increasing the size of the array which we're using with our hash table? + # Solving collisions?? + ### TO BE CONTINUED.... \ No newline at end of file diff --git a/hashtable/hashtable.py b/hashtable/hashtable.py index 0205f0ba9..42f6f3589 100644 --- a/hashtable/hashtable.py +++ b/hashtable/hashtable.py @@ -22,6 +22,10 @@ class HashTable: def __init__(self, capacity): # Your code here + self.capacity = capacity + + self.data = [None]* capacity + self.item_count= 0 def get_num_slots(self): @@ -35,7 +39,7 @@ def get_num_slots(self): Implement this. """ # Your code here - + return len(self.capacity) def get_load_factor(self): """ @@ -44,7 +48,7 @@ def get_load_factor(self): Implement this. """ # Your code here - + return self.capacity def fnv1(self, key): """ @@ -63,6 +67,10 @@ def djb2(self, key): Implement this, and/or FNV-1. """ # Your code here + hash =5381 + for c in key: + hash = (hash* 33)+ ord(c) + return hash def hash_index(self, key): @@ -82,7 +90,21 @@ def put(self, key, value): Implement this. """ # Your code here - + index = self.hash_index(key) + current = self.data[index] + + while current is not None: + if current.key == key: + current.value = value + return + current = current.next + self.item_count += 1 + new_node = HashTableEntry(key, value) + new_node.next = self.data[index] + self.data[index] = new_node + + if self.get_load_factor() > 0.7: + self.resize(self.capacity * 2) def delete(self, key): """ @@ -93,6 +115,29 @@ def delete(self, key): Implement this. """ # Your code here + index = self.hash_index(key) + # Check if there is any entry in the slot + if self.data[index] is not None: + # If so, check if that item matches + current = self.data[index] + if current.key == key: + self.item_count -= 1 + self.data[index] = current.next + if self.get_load_factor() < 0.2: + self.resize(self.capacity // 2) + return + + while current.next is not None: + if current.next.key == key: + self.item_count -= 1 + current.next = current.next.next + if self.get_load_factor() < 0.2: + self.resize(self.capacity // 2) + return + current = current.next + + print('No entry with the provided key.') + return def get(self, key): @@ -104,6 +149,15 @@ def get(self, key): Implement this. """ # Your code here + index = self.hash_index(key) + current = self.data[index] + + while current is not None: + if current.key == key: + return current.value + current = current.next + + return None def resize(self, new_capacity): @@ -114,6 +168,16 @@ def resize(self, new_capacity): Implement this. """ # Your code here + new_ht = HashTable(new_capacity) + for item in self.data: + current = item + while current is not None: + new_ht.put(current.key, current.value) + current = current.next + + self.capacity = new_ht.capacity + self.data = new_ht.data + self.item_count = new_ht.item_count