# Hash Tables
- Quick insertion and search
- Hash function is the most important part of this DS
    - Maps keys to buckets
    - Depends on the range of key values and the number of buckets
    - If our hash function is a perfect 1-1 mapping there will never be a collision
        - However, use of a height-balanced BST may be required
    - Strategies
        1. Separate Chaining: Buckets at same hash key
        2. Open Addressing: Whenever there is a collision, we keep probing on the main space with certain strategy until a free slot is found
        3. 2-Choice Hashing: Use of two hash functions
        
## Sections
- Design a Hash Table (D)
- Practical Apps
    1. Hash Set (D)
    2. Hash Map (D)
    3. Design the Key
- Conculsion

## Useful Python Tips

In [5]:
hashset = set()

hashset.add(1)

print(f'2 in hashset: {2 in hashset}')

hashset.remove(1)

2 in hashset: False


## Background

In [9]:
from collections import defaultdict

In [1]:
class TreeNode:
    def __init__(self, value):
        self.val = value
        self.left = None
        self.right = None

class Node:
    def __init__(self, value, nextNode=None):
        self.value = value
        self.next = nextNode

## Designs
### Hash Set
- Use of a facade design pattern for the bucket class, with three interfaces: exists, insert, delete
- Notice that the Hash Set doesn't need to know what the bucket instance type is. Either a LL or BST bucket design would work

In [2]:
class Bucket_BSTree:
    def __init__(self):
        self.root = None

    def searchBST(self, root: TreeNode, val: int) -> TreeNode:
        if root is None or val == root.val:
            return root

        return self.searchBST(root.left, val) if val < root.val \
            else self.searchBST(root.right, val)

    def insertIntoBST(self, root: TreeNode, val: int) -> TreeNode:
        if not root:
            return TreeNode(val)

        if val > root.val:
            # insert into the right subtree
            root.right = self.insertIntoBST(root.right, val)
        elif val == root.val:
            return root
        else:
            # insert into the left subtree
            root.left = self.insertIntoBST(root.left, val)
        return root

    def successor(self, root):
        """
        One step right and then always left
        """
        root = root.right
        while root.left:
            root = root.left
        return root.val

    def predecessor(self, root):
        """
        One step left and then always right
        """
        root = root.left
        while root.right:
            root = root.right
        return root.val

    def deleteNode(self, root: TreeNode, key: int) -> TreeNode:
        if not root:
            return None

        # delete from the right subtree
        if key > root.val:
            root.right = self.deleteNode(root.right, key)
        # delete from the left subtree
        elif key < root.val:
            root.left = self.deleteNode(root.left, key)
        # delete the current node
        else:
            # the node is a leaf
            if not (root.left or root.right):
                root = None
            # the node is not a leaf and has a right child
            elif root.right:
                root.val = self.successor(root)
                root.right = self.deleteNode(root.right, root.val)
            # the node is not a leaf, has no right child, and has a left child
            else:
                root.val = self.predecessor(root)
                root.left = self.deleteNode(root.left, root.val)

        return root

In [3]:
class Bucket_LL:
    def __init__(self):
        # a pseudo head
        self.head = Node(0)

    def insert(self, newValue):
        # if not existed, add the new element to the head.
        if not self.exists(newValue):
            newNode = Node(newValue, self.head.next)
            # set the new head.
            self.head.next = newNode

    def delete(self, value):
        prev = self.head
        curr = self.head.next
        while curr is not None:
            if curr.value == value:
                # remove the current node
                prev.next = curr.next
                return
            prev = curr
            curr = curr.next

    def exists(self, value):
        curr = self.head.next
        while curr is not None:
            if curr.value == value:
                # value existed already, do nothing
                return True
            curr = curr.next
        return False

In [4]:
class MyHashSet:
    def __init__(self, bucket_algo: str = 'BST'):
        match bucket_algo:
            case 'BST':
                self.bucket_algo = Bucket_BSTree
            case _:
                self.bucket_algo = Bucket_LL
                
        self.keyRange = 769
        self.bucketArray = [self.bucket_algo() for i in range(self.keyRange)]

    def _hash(self, key):
        return key % self.keyRange

    def add(self, key):
        bucketIndex = self._hash(key)
        self.bucketArray[bucketIndex].insert(key)

    def remove(self, key):
        bucketIndex = self._hash(key)
        self.bucketArray[bucketIndex].delete(key)

    def contains(self, key):
        bucketIndex = self._hash(key)
        return self.bucketArray[bucketIndex].exists(key)

### Hash Map
-

In [None]:
class Bucket:
    def __init__(self):
        self.bucket = []

    def get(self, key):
        for (k, v) in self.bucket:
            if k == key:
                return v
        return -1

    def update(self, key, value):
        found = False
        for i, kv in enumerate(self.bucket):
            if key == kv[0]:
                self.bucket[i] = (key, value)
                found = True
                break

        if not found:
            self.bucket.append((key, value))

    def remove(self, key):
        for i, kv in enumerate(self.bucket):
            if key == kv[0]:
                del self.bucket[i]


class MyHashMap:
    def __init__(self):
        self.key_space = 2069
        self.hash_table = [Bucket() for i in range(self.key_space)]

    def put(self, key, value):
        hash_key = key % self.key_space
        self.hash_table[hash_key].update(key, value)

    def get(self, key):
        hash_key = key % self.key_space
        return self.hash_table[hash_key].get(key)

    def remove(self, key):
        hash_key = key % self.key_space
        self.hash_table[hash_key].remove(key)

## Practical Applications
### Hash Set
- Remember used to store no repeated values!

In [None]:
# Find the Single Number
## Only works with there's one odd numbered pair
def single_num(nums: list[int]) -> int:
    a = 0
    
    for i in nums:
        a ^= i
        
    return a

### Hash Map
- Needs more info than the key alone; builds a relationship between key and info
- Some problems require one to aggregate info under a key

In [None]:
# Two Sum
## With an array and a target, return the indices of the two nums that add up to the target
## Assume only one solution
def two_sum(nums: list[int], target: int) -> list[int]:
    hashmap = {}
    
    for i in range(len(nums)):
        complement = target - nums[i]
        
        if complement in hashmap:
            return [i, hashmap[complement]]
        
        hashmap[nums[i]] = i
        
    return []

In [6]:
# Is Isomorphic
## Need to account for s -> t transitions AND index placement
def freq_map_idx(nums):
    # Key: list[count, list[indices]]
    f = {}

    for idx, num in enumerate(nums):
        try:
            f[num][0] += 1
            f[num][1].append(idx)
        except:
            f[num] = [1, [idx]]

    return f

def isIsomorphic(s: str, t: str) -> bool:
    return sorted(freq_map_idx(s).values()) == sorted(freq_map_idx(t).values())    

In [8]:
# Check if N's double exists
def checkIfExist(arr: list[int]) -> bool:
    count = {}

    for num in arr:
        # Get is an exception free select
        count[num] = count.get(num, 0) + 1

    for num in arr:
        if num != 0 and 2 * num in count:
            return True
        
        if num == 0 and count[num] > 1:
            return True

    return False

### Design the Key

In [11]:
# Group Anagrams
## Solution 1
class Solution:
    def groupAnagrams(self, strs: list[str]) -> list[list[str]]:
        ans = defaultdict(list)
        
        for s in strs:
            count = [0] * 26
            
            for c in s:
                count[ord(c) - ord("a")] += 1
                
            ans[tuple(count)].append(s)
            
        return list(ans.values())

## Solution 2 -> using a dictionary as a key
class Solution:
    def freq_map(self, nums):
        f = {}
        
        for idx, num in enumerate(nums):
            try:
                f[num] += 1
            except:
                f[num] = 1
                
        return frozenset(f.items())
    
    def groupAnagrams(self, strs: list[str]) -> list[list[str]]:
        anagrams = {}
        
        for word in strs:
            key = self.freq_map(word)
            
            try:
                anagrams[key].append(word)
            except:
                anagrams[key] = [word]
        
        return list(anagrams.values())