# ASTR 598 Lecure 8: Friday Feb. 26, 2016
# Hash functions, tables

In [1]:
# to be python 3 compatitible
from __future__ import print_function, division

import numpy as np
import math

# Notes:

- hash functions -> cryptography
- hash tables (associative arrays) -> spare multidimensional arrays. ex: python dicts
    - insert, look up is O(1) (because it's array-like)
    - really useful for sparse, high-dimensional data
- in python, can use the hashlib package to sample different hash methods
    - we'll roll our own for pedagogical purposes (but it won't be cryptologically secure, but whatever)
- ex: git heavily uses hash codes

- Hash function
    - maps arbitrary input data x to fixed length data, like a 16 char string
        - h(x) -> string where if (x != y) then h(x) != h(y)
        - if h(x) == h(y), x == y (very, very likely for a good hash code) 
        
    - for our purposes, we're going to do the following: h(x) -> int
    
- Let's say we have a key = (x1, x2, x3, x4) where x = [x1, x2, x3, x4] with x stored in a 1d table
- hash function will do something like mod table_size, but this can give us duplicates
    - for our purposes, good idea to use a prime number for table_size
- so we'll use a linked list to resolve collisions
    - This method is called separate chaining
    
- hash table functions:
    - put(k,v)
        - for a key, like (1,2,3,6), with associated value v
    - get(k)
        - given k, retrieve v
        
    - equal(key1, key2)
        if (k1.x1 == k2.x2) && (k1.x2 == k2.x2) ...
            return True
        else
            return False
            
- would want key class to contain x, value class, node class to contain key, value, next, and of course a hash table class

- put(key k, value v):
    - equivalent to m[x1][x2][x3][x4] = v for our typical example

# Code:
Redefine Node, LinkedList stuff since for each element of the hash table, we'll have a linkedlist to handle any potential collisions

In [5]:
class Node(object):
    def __init__(self, data = None, next_node = None):
        self.data = data
        self.next_node = next_node
        
    def __repr__(self):
        # How to represent the class
        return str(self.data)
    
class LinkedList(object):
    def __init__(self, head = Node(), tail = Node(), size = 0):
        self.head = head
        self.tail = tail
        self.size = size
        
        # Ensure init proper behavior
        if self.head.data != None and self.tail.data != None:
            self.size = 2
            head.next_node = self.tail
            tail.next_node = None
        # No tail
        elif self.head.data != None and self.tail.data == None:
            self.size = 1
            self.head.next_node = None
            self.tail = self.head
        # No head
        elif self.head.data == None and self.tail.data != None:
            self.size = 1
            self.tail.next_node = None
            self.head = self.tail
        else:
            self.tail = None
            self.head = self.tail
            self.size = 0
    
    # Class methods for inserting new nodes
    def insert_at_tail(self,value):
        # Case: Empty linked list
        if (self.head == None and self.tail == None):
            self.tail = Node(value,None)
            self.head = self.tail 
        
        # Case: one node
        elif(self.head == self.tail):
            new_tail = Node(value,None)
            self.head.next_node = new_tail
            self.tail = new_tail
            
        # General case:
        else:
            new_tail = Node(value, None)
            self.tail.next_node = new_tail
            self.tail = new_tail
        
        self.size = self.size + 1
    
    
    def insert_at_head(self,value):
        # Case: list is empty
        if (self.head == None and self.tail == None):
            self.head = Node(value,None)
            self.tail = self.head
        
        # Case: one node
        elif (self.head == self.tail):
            old_head = self.head
            new_head = Node(value,None)
            new_head.next_node = old_head
            new_head = self.head
        
        # General case: list has stuff
        else:
            new_head = Node(value, None)
            old_head = self.head
            new_head.next_node = old_head
            self.head = new_head
    
        self.size = self.size + 1
    
    # Delete head and return value
    def delete_at_head(self):
        # Case: list is empty, no head
        if self.head == None:
            return None
        
        # General case: list has stuff
        else:
            old_head = self.head.data
            self.head = self.head.next_node
            self.size = self.size - 1
            return old_head
    

    # Delete tail and return value
    # Slow: O(~n)
    def delete_at_tail(self):
        n = self.head
        
        # Case: Empty list
        if n == None:
            return None
        
        # Case: 1 object (at tail already)
        if n.next_node == None:
            ret = n.data
            self.head = None
            self.tail = self.head
            self.size = self.size - 1
            return ret
        
        while(n != None):
            # If at node before tail
            if n.next_node == self.tail:
                ret = n.next_node.data
                n.next_node = None
                self.tail = n
                self.size = self.size - 1
                return ret
            n = n.next_node
            
    def print_self(self):
        n = self.head
        print("Head")

        while(n != None):
            print(n.data)
            
            # See if next is tail
            if(n.next_node == None):
                #print(n.data)
                print("Tail")
                break
            n = n.next_node
        print("Size:",self.size)

In [10]:
# Given the 4 values for our data struct, construct a hash_code
def hash_code(x1, x2, x3, x4):
    # Pick least prime > len(dimension)
    key_prime = 101 # for 100^4 size matrix
    
    # Represent number in key_prime base like how 1000 = 1*10^3 + 0*10^2 + 0*10^1 + 0
    h = (x1 * key_prime * key_prime * key_prime + x2 * key_prime * key_prime + \
        x3 * key_prime * key_prime + x4)
    
    return h