diff --git a/applications/expensive_seq/expensive_seq.py b/applications/expensive_seq/expensive_seq.py index 5c82b8453..0e305259c 100644 --- a/applications/expensive_seq/expensive_seq.py +++ b/applications/expensive_seq/expensive_seq.py @@ -1,9 +1,13 @@ -# Your code here +cache = {} def expensive_seq(x, y, z): - # Your code here - + if (x, y, z) not in cache: + if x <= 0: + cache[(x, y, z)] = (y + z) + else: + cache[(x, y, z)] = expensive_seq(x-1, y+1, z) + expensive_seq(x-2, y+2, z*2) + expensive_seq(x-3, y+3, z*3) + return cache[(x, y, z)] if __name__ == "__main__": diff --git a/applications/histo/README.md b/applications/histo/README.md index 428a1b267..85b94fae3 100644 --- a/applications/histo/README.md +++ b/applications/histo/README.md @@ -68,13 +68,17 @@ bow ###### ## Hints Items: `.vgrzf()` zrgubq ba n qvpgvbanel zvtug or hfrshy. +# `.items()` method on a dictionary might be useful Sorting: vg'f cbffvoyr sbe `.fbeg()` gb fbeg ba zhygvcyr xrlf ng bapr. +# it's possible for `.sort()` to sort on multiple keys at once. Sorting: artngvirf zvtug uryc jurer `erirefr` jba'g. +# negatives might help where `reverse` won't. Printing: lbh pna cevag n inevnoyr svryq jvqgu va na s-fgevat jvgu arfgrq oenprf, yvxr fb `{k:{l}}` +# you can print a variable field width in an f-string with nested braces, like so `{x:{y}}` (The hints are encrypted with ROT13. Google for `rot13 decoder` to see them.) \ No newline at end of file diff --git a/applications/histo/histo.py b/applications/histo/histo.py index 6014a8e13..d4023c409 100644 --- a/applications/histo/histo.py +++ b/applications/histo/histo.py @@ -1,2 +1,41 @@ -# Your code here +def histo(filename): + # open file + with open(filename, 'r') as f: + # set string text to variable + s = f.read() + # list of characters to ignore + characters_to_ignore = ['"', ":", ";" , ".", "-", "+", "=", "/", "[", "]", + "{", "}", "(", ")", ",", "*", "^", "&", "|", '\\'] + # for each character in input string + for character in s: + # if it's in the ignored characters + if character in characters_to_ignore: + # replace the value with whitepsace + s = s.replace(character, ' ') + # now, ready to make string a list by splitting string on whitespace + words = s.split() + # make all words in list lowercase + words = [word.lower() for word in words] + # create an empty dict + word_dict = {} + # for each word in our list of words + for word in words: + # if not already in dict + if word not in word_dict: + # add new key/value pair + word_dict[word] = 1 + # if word in dict + else: + # increase the counter + word_dict[word] += 1 + # sort the list of tuples by the second key first (counts, descending) + # then by the first key (alphabetical order) + for tup in sorted(word_dict.items(), key=lambda x: (-x[1], x[0])): + # print our word and it's associated word count as a histogram + print(tup[0].ljust(20) + " " + ("#" * tup[1])) + + +if __name__ == '__main__': + + histo('robin.txt') diff --git a/applications/lookup_table/lookup_table.py b/applications/lookup_table/lookup_table.py index 05b7d37fa..a65589fde 100644 --- a/applications/lookup_table/lookup_table.py +++ b/applications/lookup_table/lookup_table.py @@ -1,26 +1,21 @@ -# Your code here +# expensive calculation on the fly +import math -def slowfun_too_slow(x, y): - v = math.pow(x, y) - v = math.factorial(v) - v //= (x + y) - v %= 982451653 +lookup_table = {} - return v +def inverse_root(n): + return 1/math.sqrt(n) -def slowfun(x, y): - """ - Rewrite slowfun_too_slow() in here so that the program produces the same - output, but completes quickly instead of taking ages to run. - """ - # Your code here +for i in range(1, 1000): + lookup_table[i] = inverse_root(i) +print(lookup_table[995]) -# Do not modify below this line! +# rainbow table +## hash common passwords ahead of time +## precomputed table for caching the output of cryptographic hash functions, +# usually for cracking password hashes -for i in range(50000): - x = random.randrange(2, 14) - y = random.randrange(3, 6) - print(f'{i}: {x},{y}: {slowfun(x, y)}') +# hashing function for pws should be slow \ No newline at end of file diff --git a/applications/markov/markov.py b/applications/markov/markov.py index 1d138db10..9cfe66356 100644 --- a/applications/markov/markov.py +++ b/applications/markov/markov.py @@ -1,13 +1,61 @@ import random +import numpy as np # Read in all the words in one go with open("input.txt") as f: words = f.read() + corpus = words.split() -# TODO: analyze which words can follow other words -# Your code here +def get_current_and_next(word_list): + # for each word in our word list + for i in range(len(word_list) - 1): + # iterate over without storing + # can produce a sequence of values + yield (word_list[i], word_list[i+1]) +current_and_next = get_current_and_next(corpus) -# TODO: construct 5 random sentences -# Your code here +d = {} +# grab the two values from the gen obj +for current_word, next_word in current_and_next: + # if current word is already a key + if current_word in d.keys(): + # add another 'next' word + d[current_word].append(next_word) + else: + # create a key where value is a list of next words + d[current_word] = [next_word] + +num_of_words = 100 + +# generate a start word +start_word = np.random.choice(corpus) +# create the first word of the chain +markov_chain = [start_word] +for i in range(num_of_words): + # access the values from the latest word's key in the dict + newest_word_choices = d[markov_chain[-1]] + # add a new word to the chain by grabbing one of the possible values + markov_chain.append(np.random.choice(newest_word_choices)) +print(' '.join(markov_chain)) + + +# sentences: + +""" +last to see how she began writing for him, and down on her sister, who was far as our fire in time without fifty dinners at me!" +she held the black kitten's fault entirely. +For the window with great curiosity to look as if we were playing just the poor King was in the King, so wide open! +All the boys getting larger and then she was nearly sure they had got all knots and had +""" + +# with 100 words + +""" +running after its face this led to know you know, I turned cold to find one. +"Blew--me--up," panted the King and fields, that there was all your feelings!" +There was so cold, and Alice said, because the tail just now, as she called out to wind blows-- +oh, that's very first thing down among the door of use, and, as different as soon as soon as if you're not been reduced at once! +Well, I can only two and then she could find that she went on the old nurse by the kitten had been rolling it was--'and if the King took +""" \ No newline at end of file diff --git a/applications/no_dups/no_dups.py b/applications/no_dups/no_dups.py index caa162c8c..3dcf2b149 100644 --- a/applications/no_dups/no_dups.py +++ b/applications/no_dups/no_dups.py @@ -1,5 +1,20 @@ def no_dups(s): - # Your code here + # split string into list of words + words = s.split() + # create an empty dict + word_dict = {} + # create an empty list for unique words + unique_words = [] + # for each word in list + for word in words: + # if it isn't in our dict + if word not in word_dict: + # add key/value pair + word_dict[word] = '_' + # append the new word to the list of unique ones + unique_words.append(word) + # return a str output of the unique words + return ' '.join(unique_words) diff --git a/applications/word_count/letter_count.py b/applications/word_count/letter_count.py new file mode 100644 index 000000000..fd303a115 --- /dev/null +++ b/applications/word_count/letter_count.py @@ -0,0 +1,50 @@ + + +# given a string, count how many times each letter occurs in it +# print by descending order, from the most common letter to the least common + +our_string = 'supercalifragilisticexpialidocious' + + +# UPER +# Understand +# What about spaces and special chars? +# ignore for now, just count alphabet letters + +# Plan +# loop and place in a dict +# use our Python list sorting methods to sort by descending order of the values not keys + +# E + +def letter_count(s): + our_dict = {} + + for letter in s: + if letter in our_dict: + our_dict[letter] += 1 + + else: + # ignore non-alphabetic characters + if letter.isalpha(): + our_dict[letter] = 1 + + return our_dict + +count_dict = letter_count(our_string) + +list_dict = list(count_dict.items()) + +list_dict.sort(reverse=True, key=lambda pair: pair[1]) +# sorted() + +v_set = set() +for k, v in list_dict: + if v not in v_set: + print(v, k) + v_set.add(v) + else: + print(' ', k) + +# stretch goal from Omid: print each v only once +# or do we want them all on one line, comma-separated? \ No newline at end of file diff --git a/applications/word_count/word_count.py b/applications/word_count/word_count.py index a20546425..95cbd4c34 100644 --- a/applications/word_count/word_count.py +++ b/applications/word_count/word_count.py @@ -1,5 +1,31 @@ def word_count(s): - # Your code here + '''A function that takes in a string and returns a + dictionary of words and their counts''' + characters_to_ignore = ['"', ":", ";" , ".", "-", "+", "=", "/", "[", "]", + "{", "}", "(", ")", ",", "*", "^", "&", "|", '\\'] + # for each character in input string + for character in s: + # if it's in the ignored characters + if character in characters_to_ignore: + # replace the value with whitepsace + s = s.replace(character, ' ') + # now, ready to make string a list by splitting string on whitespace + words = s.split() + # make all words in list lowercase + words = [word.lower() for word in words] + # create an empty dict + word_dict = {} + # for each word in our list of words + for word in words: + # if the word is already in the dict + if word in word_dict: + # increase the word count by 1 + word_dict[word] += 1 + # if the word isn't in dict + else: + # add it the dict + word_dict[word] = 1 + return word_dict diff --git a/collisions_resizing_vim.txt b/collisions_resizing_vim.txt new file mode 100644 index 000000000..05c0cf927 --- /dev/null +++ b/collisions_resizing_vim.txt @@ -0,0 +1,154 @@ +- hash function + array = hash table + - array full of linked lists + +Quick demo of handling a collision with a LL + +Index Chain +0 None +1 ('foo', 12) --> ('bar', 42) --> ('xyzzy', 99) +2 None +3 ('baz', 13) +4 None +5 None + + +put('foo', 12) # hash to 1 +put('baz', 13) # hash to 3 +put('bar', 23) # hash to 1 + +put('bar', 42) # overwrite the 23 + +put('xyzzy', 99) # hashes to 1 as well! + +get('bar') + +How do we do a get?? + How do we determine if it's the value we want if we're searching by the key? + A: store the key unhashed, and compare as we iterate/traverse down the linked list + +Put? + Check if key is in linked list, if so overwite, if not add new Node + +Delete? +delete('bar') + - find the matching pair of values + - point the previous node of that one to the next node of the found node + + +Linked Lists +- Singly linked, node.next +- Doubly linked, node.next and node.prev + +Node(next: 23_node, value: 12 ) ---> Node(next: None, value: 23) + +class SLL: + def __init__(self): + self.head = None + + def get(self, target_value): + # start at the head + node = self.head + while node is not None: + # check for the target value + if node.value == target_value: + return node + # move to next node + else: + node = node.next + + def delete(self, target_value): + # if it's head + # if LL is empty + + if not self.head: + return False + + if self.head.value == target_value: + self.head = self.head.next + + prev_node = self.head + cur_node = self.head.next + + while cur_node is not None: + if cur_node.value == target_value: + prev_node.next = cur_node.next + + else: + prev_node = cur_node + cur_node = cur_node.next + + +class DLL: + def __init__(self): + self.head = None + self.tail = None + + + + + +LOAD FACTOR AND RESIZING + +O(1) + +0 -> A +1 -> B +2 -> C +3 -> D -> F +4 -> E +5 + + +O(N) + +0 -> A +1 -> B --> G --> L --> M +2 -> C --> K +3 -> D --> H +4 -> E --> J +5 -> F --> I + +load factor +(number of elements) / (number of slots) +1.0 +> 2 + +Load factor < 0.7 + +If load factor < 0.2, hash table is underloaded, so array is bigger than you need + +How to resize? + +- How do we resize arrays? +-- make a new array, double the size of the old one +-- iterate down the old array, and copy every item over +-- so it's O(n) + + +- For a hash table: +-- double your backing array +-- iterate down the old array +--- and traverse down the linked list +--- then do a put (aka: hash the key, mod key, put into a node) + + + + + + +Checklist +- Go down checklist +- What took me the longest? +- run, right, fast +- binary search to debug +- Could I find what I was looking for? + +Vimium plugin +games to play vim + +- keyboard macros + + +BTW I have no special Vim plugins, I just jump around using standard Vim. + +And I could be faster, I forget to use paragraph jumps and so on. \ No newline at end of file diff --git a/hashtable/birthday_paradox.py b/hashtable/birthday_paradox.py new file mode 100644 index 000000000..2fec9a30d --- /dev/null +++ b/hashtable/birthday_paradox.py @@ -0,0 +1,39 @@ +import random +import hashlib + +# bcrypt +# hashlib + +# birthday paradox +# 'collision' of birthdays is more likely than our intuition + + +# can we avoid collisions by using a really big array? + +# sha256 always produces a unique output, but we mod its output, so we still get collisions +def our_hash(key, num_buckets): + key_bytes = f'{key}'.encode() + hashed_key = int(hashlib.sha256(key_bytes).hexdigest(), 16) + return hashed_key % num_buckets + +def how_many_before_collision(num_buckets): + hashed_keys = set() + tries = 0 + while True: + # make a bunch of random keys + key = random.random() + # hash them, modulo them by the number of buckets aka size of the array + hashed_key = our_hash(key, num_buckets) + + if hashed_key in hashed_keys: + break + + else: + # see how far we get before we have a collision + hashed_keys.add(hashed_key) + tries += 1 + + print(f'collision! Tries: {tries} before collision, that is {tries/num_buckets * 100}%') + +# 8, 16, 32 +how_many_before_collision(10000) \ No newline at end of file diff --git a/hashtable/caching.py b/hashtable/caching.py new file mode 100644 index 000000000..c11b9c871 --- /dev/null +++ b/hashtable/caching.py @@ -0,0 +1,55 @@ + +# Web client cache + +# "client" gets whatever URL we provide + +# this client should cache the web page + +# on first request, the client fetches the web page +# on subsequent requests, the client gives you what it previously fetched + +# why? +## speed +### especially for large pages or on a slow connection + +## avoid database hits +### don't overpay for services +### countries that charge by download + +# how to use hash tables to create a web client cache? +## (aka proxy server) + +## what should be the key, what should be the value? +### value: the returned HTML/JS/CSS - the dom +### key: fetch date?, or URL + +# thefacebook.com, google.com + +import urllib.request + +cache = {} +def web_client(URL): + + # check if the URL is in cache + if URL in cache: + print('found locally, saving time!!') + return cache[URL] + + # otherwise, fetch and put in cache + else: + print("did not find, going out over the interwebs") + response = urllib.request.urlopen(URL) + + data = response.read() + + response.close() + + cache[URL] = data + + return cache[URL] + +web_client('https://www.google.com') +web_client('https://www.google.com') + +# what if the web page changes? data in cache would be stale! +# Won't cache grow without end? \ No newline at end of file diff --git a/hashtable/hash_table_notes_day_1.py b/hashtable/hash_table_notes_day_1.py new file mode 100644 index 000000000..29aeee2bc --- /dev/null +++ b/hashtable/hash_table_notes_day_1.py @@ -0,0 +1,174 @@ + +# lorem ipsum +my_arr = ["Lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit" ] + +# search for an element, for example consectetur + +# O(n) +# for word in my_arr: +# if word == 'consectetur': +# return True + +# if 'elit' in my_arr: +# return True + +# O(log n) +# sort array, then run binary search on it + +# what if we could find the index of the element in O(1) time? +# then we could take 1 more step to access the element: my_arr[5] + +# we would have O(1) search + +# we would like a function that returns the index + +# Hash function or hashing function + +## do you have to track where you've put things in the underlying array? + + +# Hash functions +# Write a function that takes a string and turns it into a number + +# hash the string with a hashing function....and you get back a hash + +my_arr = [None] * 8 + +# it's fast +# deterministic +# can't get the input from the output + +def len_hash(s): + return len(s) # for this example, we will use the length of the word as the index + +# Use the hashing function to put the word 'hello' into the array +hello_number = len_hash('hello') # use hashing function to get an index +my_arr[hello_number] = 'hello' + + +## some time passes... +hello_number = len_hash('hello') # use hashing function to find the index +my_arr[hello_number] # pull out the word we want + +# what about words of the same length? +world_number = len_hash('world') +my_arr[world_number] = 'world' + + +world_number = len_hash('world') +my_arr[world_number] + +# what about long words? +long_word = 'supercalifragilisticexpialidocious' +long_word_hash = len_hash(long_word) + +long_word_idx = long_word_hash % len(my_arr) + +my_arr[long_word_idx] = long_word + +## how to fix this? +### dynamic array? + +### use modulo, aka 'mod the number' + + +# the problem with arrays: search is slow +# How to get faster? +# To reach O(1), make a magic function to return the index of the target word in O(1) time +# made simple hash function +# make the hash function and array play nice together + +# Let's improve our hash function, by making it more unique + +## add up the letters +### assign a number to every letter +### ASCII has already done this + +def add_hash(s): + total = 0 + for letter in s: + total += ord(letter) + return total + +### won't work for anagrams! +#### dad vs add + +# UTF-8, ASCII on steroids +# encode +def utf8_hash(s): + total = 0 + string_bytes = s.encode() + + for b in string_bytes: + total += b + return total + +# we can do math on the bytes of the string! + +my_arr = [None] * 10000 + +def put(key, value): + # turn the key into an index + hashed_string = utf8_hash(key) + idx = hashed_string % len(my_arr) + + # put the value at that index in our array + my_arr[idx] = value + +put('hello', 'hello world') + +# what is the time complexity here? +## if you measure by the length of the key, O(n) +## if you measure by the number of slots / length of array, then it's O(1) + +def get(s): + hashed_string = utf8_hash(s) # turn string into number + + idx = hashed_string % len(my_arr) # turn number into index + + value = my_arr[idx] # go and access element at that index + + return value + +get('hello') ## get the key + + +# Delete: find the value, then set to None + +# Put +## 1. Hash our string/key, get out a number +## 2. Take this number and modulo it by the length of the array +## 3. This new number can be used as an index, so put the value at that index in our array + +# Get +## 1. Hash our string/key, string --> number +## 2. Mod this number by length of array +## 3. Use this modded number / index to get the value there + + + + +## Common use-cases? +### hashing functions: encryption +### Fast O(1) lookup of values using a key to find it + +## Easy to think about time complexity for arrays vs objects/dictionaries + +# if x in my_data_structure: ## O(n) for an array, runs get() --> O(1) for a hash table + +# look up user profile from username, 1billion users + + + + + # Couldn't we end up with the wrong modulo if we've increased the size of the array between put and get? + # Increasing the size of the array which we're using with our hash table? + # Solving collisions?? + ### TO BE CONTINUED.... + +"""Notes with Beej + +what they solve - look at each item in an index, first, second, third - Linear searching - that's O(n) - Standard array search +if we double the list - twice as long to search, triple = 3 times as long + +""" \ No newline at end of file diff --git a/hashtable/hashtable-in-class.py b/hashtable/hashtable-in-class.py new file mode 100644 index 000000000..e25fdd259 --- /dev/null +++ b/hashtable/hashtable-in-class.py @@ -0,0 +1,273 @@ +class HashTableEntry: + """ + Linked List hash table key/value pair + """ + def __init__(self, key, value): + self.key = key + self.value = value + self.next = None + + +# Hash table can't have fewer than this many slots +MIN_CAPACITY = 8 + + +class HashTable: + """ + A hash table that with `capacity` buckets + that accepts string keys + + Implement this. + """ + + def __init__(self, capacity): + + self.capacity = max(capacity, MIN_CAPACITY) + self.storage = [None] * self.capacity + + self.load = 0 + + + def get_num_slots(self): + """ + Return the length of the list you're using to hold the hash + table data. (Not the number of items stored in the hash table, + but the number of slots in the main list.) + + One of the tests relies on this. + + Implement this. + """ + return len(self.storage) + + + def get_load_factor(self): + """ + Return the load factor for this hash table. + + Implement this. + """ + # Your code here + +# hashing functions used in: +# git +# cryptocurrencies +# hash tables +# store passwords + +# choose between hashing functions +## some are fast, some are slow + +# "Can a hash be reversed?" +# "How/Why not?" + +## What's reversing a hash mean? +## take a hash number and try to get back to the string it was made from + +## p@$$w0rd +## 0x23283287ad878f983efc + +# deterministic +# irreversible + +# an attacker can't reverse, but could try hashing common passwords + +## for a hash table, you want a fast function --> O(1) +## for passwords, you want a slow function + + + + +# Different strategies to handle collisions? +## chaining: array of linked lists, with one LL per index, each node.next points to the second element +## Array of arrays, with one array per index, just append +## Disallow collisions? +## Open addressing. Linear probing, quadratic probing. [None, 'hello', 'world', None] + + + + + + def fnv1(self, key): + """ + set hash to 0? + maintain a total? + + - start hash at some large number(FNV_offset_basis) + - the hashed variable maintains our total + + some_big_prime * some_other_big_prime = some_mysterious_number + + + Comp Arch - bitwise operations, including XOR + + 0101010101010 + ^ 1101101011001 + ------------- + 1000111110011 + + + """ + FNV_offset_basis = 14695981039346656037 + FNV_prime = 1099511628211 + hashed = FNV_offset_basis + + bytes_to_hash = key.encode() + + for byte in bytes_to_hash: + hashed = hashed * FNV_prime + + hashed = hashed ^ byte + + return hashed + + def djb2(self, key): + """ + DJB2 hash, 32-bit + + Implement this, and/or FNV-1. + + unsigned long + hash(unsigned char *str) + { + unsigned long hash = 5381; + int c; + + while (c = *str++) + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + + return hash; + } + Left bitshifting + Left shift + | + 0101010101000000 + | + + Why 5381 and * 33? because they work! + + What's "work" - what makes these good? + - irreversible + - nice distribution, spreads them out over the array --> minimizes collisions + + """ + hashed = 5381 + + bytes_to_hash = key.encode() + + for byte in bytes_to_hash: + hashed = ((hashed << 5) + byte) + # hashed = ((hashed * 33) + byte) + + return hashed + + + def hash_index(self, key): + """ + Take an arbitrary key and return a valid integer index + between within the storage capacity of the hash table. + """ + #return self.fnv1(key) % self.capacity + return self.djb2(key) % self.capacity + + def put(self, key, value): + """ + self.put(key, None) + -- will break our count!! + + Store the value with the given key. + + Hash collisions should be handled with Linked List Chaining. + + Implement this. + """ + # hash the key - self.hash_index will modulo it + idx = self.hash_index(key) + + # check for a collision + if self.storage[idx] != None: + print('warning! collision!!!') + + # insert the value at that location + self.storage[idx] = value + + self.load += 1 + + + def delete(self, key): + """ + Remove the value stored with the given key. + + Print a warning if the key is not found. + + Implement this. + + """ + # hash the key to find index + idx = self.hash_index(key) + + if self.storage[idx] == None: + print('Warning! no key!!!') + + else: + self.storage[idx] = None + + self.load -= 1 + + + def get(self, key): + """ + Retrieve the value stored with the given key. + + Returns None if the key is not found. + + Implement this. + """ + # Your code here + + + def resize(self, new_capacity): + """ + Changes the capacity of the hash table and + rehashes all key/value pairs. + + Implement this. + """ + # Your code here + + + +if __name__ == "__main__": + ht = HashTable(8) + + ht.put("line_1", "'Twas brillig, and the slithy toves") + ht.put("line_2", "Did gyre and gimble in the wabe:") + ht.put("line_3", "All mimsy were the borogoves,") + ht.put("line_4", "And the mome raths outgrabe.") + ht.put("line_5", '"Beware the Jabberwock, my son!') + ht.put("line_6", "The jaws that bite, the claws that catch!") + ht.put("line_7", "Beware the Jubjub bird, and shun") + ht.put("line_8", 'The frumious Bandersnatch!"') + ht.put("line_9", "He took his vorpal sword in hand;") + ht.put("line_10", "Long time the manxome foe he sought--") + ht.put("line_11", "So rested he by the Tumtum tree") + ht.put("line_12", "And stood awhile in thought.") + + print("") + + # Test storing beyond capacity + for i in range(1, 13): + print(ht.get(f"line_{i}")) + + # Test resizing + old_capacity = ht.get_num_slots() + ht.resize(ht.capacity * 2) + new_capacity = ht.get_num_slots() + + print(f"\nResized from {old_capacity} to {new_capacity}.\n") + + # Test if data intact after resizing + for i in range(1, 13): + print(ht.get(f"line_{i}")) + + print("") \ No newline at end of file diff --git a/hashtable/hashtable.py b/hashtable/hashtable.py index 0205f0ba9..dd6b09227 100644 --- a/hashtable/hashtable.py +++ b/hashtable/hashtable.py @@ -19,10 +19,11 @@ class HashTable: Implement this. """ - + # updated from class def __init__(self, capacity): - # Your code here - + self.capacity = capacity + self.table = [None] * capacity + self.count = 0 def get_num_slots(self): """ @@ -34,7 +35,7 @@ def get_num_slots(self): Implement this. """ - # Your code here + return len(self.table) def get_load_factor(self): @@ -43,7 +44,9 @@ def get_load_factor(self): Implement this. """ - # Your code here + # number of items divided by number of buckets + # try to keep between 20 - 70% + return self.count / self.get_num_slots() def fnv1(self, key): @@ -52,9 +55,17 @@ def fnv1(self, key): Implement this, and/or DJB2. """ - - # Your code here - + # FNV1 parameters + offset_basis = 14695981039346656037 + FNV_prime = 1099511628211 + # hash function, alternate version + hash = offset_basis + key_bytes = key.encode() + # for each byte of data to be hashed + for byte in key_bytes: + hash = hash * FNV_prime + hash = hash ^ byte + return hash def djb2(self, key): """ @@ -62,7 +73,15 @@ def djb2(self, key): Implement this, and/or FNV-1. """ - # Your code here + # make hash == to 5381 + hash = 5381 + # encode + byte_array = key.encode() + for arr in byte_array: + # hash math + hash = ((hash << 5) + arr) + # hash = ((hash * 33) + arr) + return hash def hash_index(self, key): @@ -73,6 +92,7 @@ def hash_index(self, key): #return self.fnv1(key) % self.capacity return self.djb2(key) % self.capacity + def put(self, key, value): """ Store the value with the given key. @@ -81,7 +101,37 @@ def put(self, key, value): Implement this. """ - # Your code here + index = self.hash_index(key) + new_node = HashTableEntry(key, value) + existing_node = self.table[index] + self.count += 1 + if existing_node: + last_node = None + while existing_node: + if existing_node.key == key: + existing_node.value = value + return + last_node = existing_node + existing_node = existing_node.next + last_node.next = new_node + else: + self.table[index] = new_node + if self.get_load_factor() > 0.7: + return self.resize(self.capacity * 2) + + + # HANDLING COLLISSIONS WITH A LINKED LIST + + # if there's None, just make a node. That's the LL head + ## increment your load by 1 + + # if there's already a node, iterate down and check keys + # if you find the key, overwrite the value + ## don't increment the load + ### stop incrementing and return + + # if you reach the end, then add a node at the head or the tail + ## increment the load by 1 def delete(self, key): @@ -92,7 +142,24 @@ def delete(self, key): Implement this. """ - # Your code here + delete_at_index = self.hash_index(key) + existing_node = self.table[delete_at_index] + + if existing_node: + self.count -= 1 + last_node = None + while existing_node: + if existing_node.key == key: + if last_node: + last_node.next = existing_node.next + else: + self.table[delete_at_index] = existing_node.next + last_node = existing_node + existing_node = existing_node.next + if self.get_load_factor() < 0.2: + return self.resize(int(self.capacity / 2)) + else: + print("Key not found") def get(self, key): @@ -103,8 +170,16 @@ def get(self, key): Implement this. """ - # Your code here + index = self.hash_index(key) + existing_node = self.table[index] + if existing_node: + while existing_node: + if existing_node.key == key: + return existing_node.value + existing_node = existing_node.next + else: + return None def resize(self, new_capacity): """ @@ -113,8 +188,22 @@ def resize(self, new_capacity): Implement this. """ - # Your code here - + if new_capacity > 8: + self.capacity = new_capacity + else: + self.capacity = 8 + old_array = self.table + self.table = [None] * self.capacity + old_size = self.count + + current_node = None + + for entry in old_array: + current_node = entry + while current_node != None: + self.put(current_node.key, current_node.value) + current_node = current_node.next + self.count = old_size if __name__ == "__main__": diff --git a/hashtable/indexing.py b/hashtable/indexing.py new file mode 100644 index 000000000..3a70b4f0d --- /dev/null +++ b/hashtable/indexing.py @@ -0,0 +1,37 @@ + + +records = [ + ("Tara", "Web"), + ("Kyle", "Web"), + ("Adrian", "Web"), + ("Janessa", "Web"), + ("Mike", "Web"), + ("Cai", "DS"), + ("Chris", "DS"), + ("Craig", "iOS") +] + +# how could we show in O(1) time everyone in a particular track? + +# build an index, or indexing on an attribute + +# index on the track: make the track the key, have as value a list, append the names to the list + +def build_index(records): + idx = {} + for name, track in records: + if track in idx: + idx[track].append(name) + + else: + idx[track] = [name] + + return idx + +# index the data on an attribute: rooms in a house, pools + +indexed_records = build_index(records) + +print(indexed_records['DS']) +print(indexed_records['Web']) +print(indexed_records['iOS']) \ No newline at end of file diff --git a/hashtable/memoization.py b/hashtable/memoization.py new file mode 100644 index 000000000..fba895549 --- /dev/null +++ b/hashtable/memoization.py @@ -0,0 +1,38 @@ + +# memoization, closely related to dynamic programming +## DP: top down, break the problem up as you +## reuse previous results + +## key is what you have, value is what you calculate + + +# fibonacci sequence +## a function that returns the n-th item in the fibonacci sequence +## golden proportion + +## 0 1 1 2 3 5 8 13 21 34 55 89 +### Kanban board: card holds a feature, "make this button" +### 1 2 3 5 8 13 21 + +# let's do it recursively + +# need base case +# progress toward base case + +cache = {} +def fib(n): + if n == 0 or n == 1: + return n + + else: + if n in cache: + return cache[n] + else: + cache[n] = fib(n - 1) + fib(n - 2) + + return cache[n] + +print(fib(3)) # should be 2 +print(fib(6)) # should be 8 +print(fib(11)) # should be 89 +print(fib(1050)) \ No newline at end of file diff --git a/hashtable/sorting.py b/hashtable/sorting.py new file mode 100644 index 000000000..1775690ec --- /dev/null +++ b/hashtable/sorting.py @@ -0,0 +1,76 @@ + +# does a hash table preserve order? + +""" +hash_table = HashTable() + +hash_table.storage = [Node('key3', 'v3'), None, None, Node('key1', 'v1')] + +hash_table.put('key1', 'v1') +hash_table.put('key2', 'v2') +hash_table.put('key3', 'v3') + +hash_table.storage.sort() + +hash_table.get('key1') +""" + +# hash the key, get the index, looks there...???? + +arr = [1, 2, 3] +arr.append(1) +arr.append(2) +arr.append(3) + +# why doesn't a hash table keep things in order, the way an array does? +# the hash function takes the key and returns a random index + +# sets, dictionaries, object or hash maps + +# can you sort a hash table (or dictionary/object/hash map)? +## go to the index, sort the linked list? + +arr.sort() + +# in Python, what if we got a list based on the dictionary? +my_dict = { + 'a': '1', + 'f00': 'izzy', + 'qux': 'bar', +} +# a list-like object - actually an iterator +my_dict.items() + +dict_list = list(my_dict.items()) + +# sort by default goes in ascending order, aka normal alphabetical +# also by default uses the first item in each tuple to sort + +dict_list.sort(reverse=True) + +print(dict_list) + +# print in ascending order, sorted by value +dict_list.sort(key=lambda tuple_pair: tuple_pair[1]) +print(dict_list) + +""" +[('a', 1), ('f00', 'bar'), ('qux', 'izzy')] + + +JS +x => x * x + +lambda x, y: x * y + +HOF: a function that takes a function +in functional programming (FP), we don't work in place +Instead always returns a new data structure + +"pure function" has no side effects + +map(lambda x: x * 2, [1, 2, 3, 4]) + +list(map(lambda x: x * 2, [1, 2, 3, 4])) +[2, 4, 6, 8] +""" \ No newline at end of file diff --git a/hashtable/trans_table.py b/hashtable/trans_table.py new file mode 100644 index 000000000..8fc406dde --- /dev/null +++ b/hashtable/trans_table.py @@ -0,0 +1,79 @@ + + +# transposition table + +# you have data to transform from one form into another + +# transposition cipher +# Caesar cipher --> 'rotate' the letter + +# given a string, build a new string by looking up each letter + +encode_table = { + 'A': 'H', + 'B': 'Z', + 'C': 'Y', + 'D': 'W', + 'E': 'O', + 'F': 'R', + 'G': 'J', + 'H': 'D', + 'I': 'P', + 'J': 'T', + 'K': 'I', + 'L': 'G', + 'M': 'L', + 'N': 'C', + 'O': 'E', + 'P': 'X', + 'Q': 'K', + 'R': 'U', + 'S': 'N', + 'T': 'F', + 'U': 'A', + 'V': 'M', + 'W': 'B', + 'X': 'Q', + 'Y': 'V', + 'Z': 'S' +} + +# make a function to encode a string +# iterate through the string we're given +# for every letter, look up its encoding (its transformation) +# build a new string + +first_string = 'hello' + +def encode(old_string): + new_string = '' + + for letter in old_string.upper(): + new_string = new_string + encode_table[letter] + + return new_string + +herrow = encode(first_string) +print(herrow) + +# make a decode table so we can also decode our super secret messages +# with encode table, keys --> values, values --> keys + +## iterate through encode table +### for each key, value, add to a new dictionary with value, key +decode_table = {} +for key, value in encode_table.items(): + decode_table[value] = key + + +def decode(old_string): + new_string = '' + + for letter in old_string.upper(): + new_string = new_string + decode_table[letter] + + return new_string + + +decoded = decode(herrow) +print(decoded) \ No newline at end of file