In [9]:
def htable(nbuckets): # nbuckets indicates the number of buckets we want to create in our hashtable
    
    return [[] for i in range(nbuckets)] # returns empty hashtable with desired number of buckets

In [39]:
htable(10)

[[], [], [], [], [], [], [], [], [], []]

In [10]:
def hashcode(o):
    
    if type(o) == int:
        return o # hashcode for an integer is the integer itself
    if type(o) == str:
        h = 0
        for c in o:
            h = h*31 + ord(c) # sum of character unicode values
        return h
    return # return no hashcode for types other than integers and strings

In [37]:
hashcode(42)

42

In [38]:
hashcode("Michael Ruddy")

63401902953007148893

In [11]:
def htable_put(table, key, value):
    """
    Process is similar to adding a key-value pair to a dictionary.
    The type(value) can be anything.
    """

    bucket = table[hashcode(key) % len(table)] # find the appropriate bucket for our key in our hashtable

    if bucket:
        for association in bucket:
            if association[0] == key:
                bucket.remove(association) # if key is already present, remove key-value pair
                break
    bucket.append((key, value)) # add new or updated key-value pair to hashtable

In [29]:
table = htable(5)
htable_put(table, "a", "123")
htable_put(table, "b", "4")
htable_put(table, "g", ("tuple", "tuple2"))

In [12]:
def htable_get(table, key):
    
    bucket = table[hashcode(key) % len(table)] # find the appropriate bucket for our key in our hashtable

    for association in bucket:
        if association[0] == key:
            return association[1] # return the associated value for our input key
    return # returns None if key is not found in hashtable

In [30]:
htable_get(table, "a")

'123'

In [31]:
htable_get(table, "b")

'4'

In [32]:
htable_get(table, "g")

('tuple', 'tuple2')

In [40]:
# if we add new values for existing keys, they will replace the original values
htable_put(table, "a", "apple")
htable_put(table, "a", "xyz")
htable_put(table, "g", ["list", "of", "words"])

In [41]:
htable_get(table, "a")

'xyz'

In [42]:
htable_get(table, "b")

'4'

In [43]:
htable_get(table, "g")

['list', 'of', 'words']

In [13]:
def myhtable_create_index(files):
    """
    Build an index from word to set of document indexes
    This does the exact same thing as create_index() except that it uses
    your htable.  As a number of htable buckets, use 4011.
    Returns a list-of-buckets hashtable representation.
    """
    index = htable(4011)

    for filename in files:
        word_list = set(words(get_text(filename)))
        for word in word_list:
            curr_value = htable_get(index, word)
            if curr_value:
                curr_value.add(files.index(filename))
                value = curr_value
            else:
                value = set()
                value.add(files.index(filename))
            htable_put(index, word, value)
    return index

In [14]:
def myhtable_index_search(files, index, terms):
    """
    This does the exact same thing as index_search() except that it uses your htable.
    I.e., use htable_get(index, w) not index[w].
    """

    matches = []

    for term in terms:
        term_matches = htable_get(index, term)
        if not matches:
            matches = term_matches
        else:
            matches = set(matches).intersection(set(term_matches))
    if not matches:
        return []
    filenames = [files[match] for match in matches]
    return filenames

In [15]:
def htable_buckets_str(table):
    """
    Return a string representing the various buckets of this table.
    The output looks like:
        0000->
        0001->
        0002->
        0003->parrt:99
        0004->
    where parrt:99 indicates an association of (parrt,99) in bucket 3.
    """
    bucket_str_list = []
    for i in range(len(table)):
        bucket_key_values = []
        for association in table[i]:
            if association and association[0] and association[1]:
                bucket_key_values.append(str(association[0]) + ':' + str(association[1]))
        bucket_str_list.append('000' + str(i) + '->' + ', '.join(bucket_key_values))
    return '\n'.join(bucket_str_list) + '\n'

In [16]:
def htable_str(table):
    """
    Return what str(table) would return for a regular Python dict
    such as {parrt:99}. The order should be in bucket order and then
    insertion order within each bucket. The insertion order is
    guaranteed when you append to the buckets in htable_put().
    """
    key_values = []

    for bucket in table:
        if bucket:
            for association in bucket:
                key = association[0]
                value = association[1]
                key_values.append(str(key) + ':' + str(value))
    return '{' + ', '.join(key_values) + '}'