# DSA Hash Tables

In [None]:
#A Hash Table is a data structure designed to be fast to work with.
#A reason Hash Tables are sometines preferrred instead of arrays or linked lists is because searching for, adding, and deleting data can be done really
# quickly, even for large amounts of data.

# Building A Hash Table from Scratch
# We will build the Hash Set in 5 steps:

# 1. Starting with an array
# 2. Storing names using a hash function
# 3. Looking up an element using a hash function
# 4. Handling colisions
# 5. The basic Hash Set code example and simulation 

### Step1: Starting with an array

In [1]:
#Using an array, we could store names like this:

my_array = ['Djim', 'Emma', 'Moussa', 'Michel', 'James']

#To find "Michel" in this array, we need to compare each name, element by element, until we find "Michel"


### Step2: Storing names using a hash function

In [6]:
# Now comes the special way we interact with the Hash Set we are making.
# We want to store a name directly into its right place in the array, and this is where the hash function comes in.
def hash_function(value):
    sum_of_chars = 0
    for char in value:
        sum_of_chars += ord(char)

    return sum_of_chars % 10
print("'Michel' has hash code:", hash_function('Michel'))

'Michel' has hash code: 4


### Step3: Looking up a name using a hash function

In [7]:
# We have now established a super basic Hash Set, because we do not have to check the array element by element anymore to find out 'Pete' is in there, we can
# just use the hash function to go straight to the right element.

my_hash_set = [None, 'Jones', None, 'Lisa', None, 'Bob', None, 'Siri', 'Pete', None] 

def hash_function(value):
    sum_of_chars = 0
    for char in value:
        sum_of_chars += ord(char)

    return sum_of_chars % 10

def contains(name):
    index = hash_function(name)
    return my_hash_set[index] == name

print(" 'Pete' is in the Hash Set:", contains('Pete'))

 'Pete' is in the Hash Set: True


### Step4: Handling collisions

In [9]:
my_hash_set = [
    [None],
    ['Jones'],
    [None], 
    ['Lisa', 'Stuart'],
    [None], 
    ['Bob'],
    [None],
    ['Siri'],
    ['Pete'],
    [None]
]

# Searching for "Stuart" in our Hash Set now means that using the hash function we end up directly in bucket 3, but then be must first check "Lisa"
# in that bucket, before we find "Stuart" as the second element in bucket 3.


### Step5: Hash Set code example and simulation

In [12]:
#To complete our very basic Hash Set code, let's have functions for adding and searching for names in the Hash Set, which is now a two dimensional array.

my_hash_set = [
    [None],
    ['Jones'],
    [None],
    ['Lisa'],
    [None],
    ['Bob'],
    [None],
    ['Siri'],
    ['Pete'],
    [None]
]
def hash_function(value):
    return sum(ord(char) for char in value) % 10

def add(value):
    index = hash_function(value)
    bucket = my_hash_set[index]
    if value not in bucket:
        bucket.append(value)

def contains(value):
    index = hash_function(value)
    bucket = my_hash_set[index]
    return value in bucket

add('Stuart')

print(my_hash_set)
print('Contains Stuart:', contains('Stuart'))

[[None], ['Jones'], [None], ['Lisa', 'Stuart'], [None], ['Bob'], [None], ['Siri'], ['Pete'], [None]]
Contains Stuart: True


# DSA Hash Sets

In [None]:
# A Hash Set is a form of Hash Table data structure that usually holds a large number of elements. Using a Hash Set we can search, add, and remove elements
# really fast. Hash Sets are used for lookup, to check if an element is part of a set. A Hash Set stores unique elements in buckets according to the 
# element's hash code.


# Hash code: A number generated from an element's unique value (key), to determine what bucket that Hash Set element belongs to.
# Unique elements: A Hash Set cannot have more than one element with the same value.
# Bucket: A Hash Set consists of many such buckets, or containers, to store elements. If two elements have the same hash code, they belong to the same bucket.
# The buckets are therefore often implemented as arrays or linked lists, because a bucket needs to be able to hold more than one element.

# A hash code is generated by a hash function.

## Hash Set Implementation

In [13]:
class SimpleHashSet:
    def __init__(self, size=100):
        self.size = size
        self.buckets = [[] for _ in range(size)] # A list of buckets, each is a list(to handle collisions)

    def hash_function(self, value):
        #Simple hash function: sum of character codes modulo the number of buckets
        return sum(ord(char) for char in value) % self.size
    
    def add(self, value):
        # Add a value if it's not already present
        index = self.hash_function(value)
        bucket = self.buckets[index]
        if value not in bucket:
            bucket.append(value)

    def contains(self, value):
        #Check if a value exists in the set
        index = self.hash_function(value)
        bucket = self.buckets[index]
        return value in bucket
    
    def remove(self, value):
        #Remove a value
        index = self.hash_function(value)
        bucket = self.buckets[index]
        if value in bucket:
            bucket.remove(value)

    def print_set(self):
        #Print all elements in the hash set
        print("Hash Set contains:")
        for index, bucket in enumerate(self.buckets):
            print(f"Bucket {index}: {bucket}")

# Creating the Hash Set from the simulation
hash_set = SimpleHashSet(size=10)

hash_set.add("Charlotte")
hash_set.add("Thomas")
hash_set.add("Jens")
hash_set.add("Peter")
hash_set.add("Lisa")
hash_set.add("Adele")
hash_set.add("Michaela")
hash_set.add("Bob")

hash_set.print_set()

print("\n'Peter' is in the set:", hash_set.contains('Peter'))

print("Removing 'Peter'")
hash_set.remove('Peter')

print("'Peter' is in the set:", hash_set.contains('Peter'))

print("'Adele' has hash code:", hash_set.hash_function('Adele'))

Hash Set contains:
Bucket 0: ['Thomas', 'Jens']
Bucket 1: []
Bucket 2: ['Peter']
Bucket 3: ['Lisa']
Bucket 4: ['Charlotte']
Bucket 5: ['Adele', 'Bob']
Bucket 6: []
Bucket 7: []
Bucket 8: ['Michaela']
Bucket 9: []

'Peter' is in the set: True
Removing 'Peter'
'Peter' is in the set: False
'Adele' has hash code: 5


# DSA Hash Maps

In [14]:
# A Hash Map is a form of Hash Table data structure that usually holds a large number of entries. Using a Hash Map we can search, add, modify, and remove 
# entries really fast. Hash Maps are used to find detailed information about something. 


# Entry: Consists of a key and a value, forming a key-value pair.
# Key: Unique for each entry in the Hash Map. Used to generate a hash code determining the entry's bucket in the Hash Map.
#  This ensures that every entry can be efficiently located.
# Hash Code: A number generated from an entry's key, to determine what bucket that Hash Map entry belongs to.
# Bucket: A Hash Map consists of many such buckets, or containers, to store entries.
# Value: Can be nearly any kind of information, like name, birth date, and address of a person. The value can be many different kinds of information combined.


### Finding The Hash Code.

In [None]:
# A hash code is generated by a hash function. 

# The hash function in the simulation above takes the numbers in the social security number (not the dash), add them together, and does a modulo 10 operation
# (% 10) on the sum  of characters to get the hash code as a number from 0 to 9.

### Hash Map Implementation

In [15]:
class SimpleHashMap:
    def __init__(self, size=100):
        self.size = size
        self.buckets = [[] for _ in range(size)] # A list of buckets, each is a list (to handle collisions)

    def hash_function(self, key):
        #Sum only the numerical values of the key, ignoring non-numerical characters
        numeric_sum = sum(int(char) for char in key if char.isdigit())
        return numeric_sum % 10 # Perform modulo 10 on the sum 
    
    def put(self, key, value):
        #Add or update a key-value pair
        index = self.hash_function(key)
        bucket = self.buckets[index]
        for i, (k, v) in enumerate(bucket):
            if k == key:
                bucket[i] = (key, value)  #update existing key
                return
        bucket.append((key, value)) # Add a new key-value pair if not found

    def get(self, key):
        # Retrieve a value by key
        index = self.hash_function(key)
        bucket = self.buckets[index]
        for k, v in bucket:
            if k == key:
                return v
        return None  #Key not found
    
    def remove(self, key):
        # Remove a key-value pair
        index = self.hash_function(key)
        bucket = self.buckets[index]
        for i, (k, v) in enumerate(bucket):
            if k == key:
                del bucket[i]  #Remove the key-value pair
                return 
            
    def print_map(self):
        #Print all key-value pairs in the hash map
        print("Hash map contents:")
        for index, bucket in enumerate(self.buckets):
            print(f"Bucket {index}: {bucket}")

# Creating the Hash Map from the simulation
hash_map = SimpleHashMap(size=10)

#Adding some entries
hash_map.put("123-4567", "Charlotte")
hash_map.put("123-4568", "Thomas")
hash_map.put("123-4569", "Jens")
hash_map.put("123-4570", "Peter")
hash_map.put("123-4571", "Lisa")
hash_map.put("123-4672", "Adele")
hash_map.put("123-4573", "Michaela")
hash_map.put("123-6574", "Bob")


hash_map.print_map()

# Demonstrating retrieval
print("\nName associated with '123-4570':", hash_map.get('123-4570'))

print("Updating the name for '123-4570' to 'James'")
hash_map.put("123-4570", "James")

#Checking if Peter is still there 
print("Name associated with '123-4570':", hash_map.get("123-4570"))

Hash map contents:
Bucket 0: [('123-4569', 'Jens')]
Bucket 1: []
Bucket 2: [('123-4570', 'Peter')]
Bucket 3: [('123-4571', 'Lisa')]
Bucket 4: []
Bucket 5: [('123-4672', 'Adele'), ('123-4573', 'Michaela')]
Bucket 6: []
Bucket 7: []
Bucket 8: [('123-4567', 'Charlotte'), ('123-6574', 'Bob')]
Bucket 9: [('123-4568', 'Thomas')]

Name associated with '123-4570': Peter
Updating the name for '123-4570' to 'James'
Name associated with '123-4570': James
