# Recommendation System Project 
AD315 - Project 2

## Imports

In [2]:
import csv 
import time 
from typing import List, Any, Optional 

def preview_csv(path="user_item_data.csv"):
    with open(path, "r") as f:
        reader = csv.reader(f)
        next(reader)  # skip header
        for i in range(5):
            print(next(reader))

preview_csv()


['user6', 'Ghost of Tsushima']
['user4', 'Fallout']
['user6', 'Bloodborne']
['user1', 'League of Legends']
['user5', 'Super Mario Bros']


In [None]:
## HashTable (Base Class)
from typing import Any

class HashTable:
    def __init__(self, size:int, collision_avoidance: str = "separate_chaining"):
        self.size = size
        self.collision_avoidance = collision_avoidance

        # For separate chaining each slot is a list (bucket) of [key, items_list]
        if collision_avoidance == "separate_chaining":
            self.table = [[] for _ in range(size)]
        else: 
            # Double hashing: flat array or entries 
            self.table = [None] * size 

        # Stats
        self.collisions = 0

    def hash(self, key: Any) -> int:
        return hash(key) % self.size
    
    def second_hash(self, key: Any) -> int:
        # For double hashing step size 
        return 1 + (hash(key) % (self.size - 1))
    
    def insert(self, key: Any, item: Any):
        # Insert a (user, item) pair
        # key: user ID (e.g., "user42")
        # item: single item (e.g., "item23")

        # Separate chaining
        if self.collision_avoidance == "separate_chaining":
            index = self.hash(key)
            bucket = self.table[index]

            # Look for existing key in bucket 
            for pair in bucket:
                if pair[0] == key:
                    items_list = pair[1]
                    if item not in items_list:
                        items_list.append(item)
                    return
                
            # Key not found, create new entry
            if bucket: 
                self.collisions += 1
            bucket.append([key, [item]])
            return
        
        # Double hashing
        if self.collision_avoidance == "double_hashing":
            index = self.hash(key)
            step = self.second_hash(key)

            original_index = index
            i = 0

            while True:
                entry = self.table[index]

                # Empty slow --> create new entry
                if entry is None:
                    self.table[index] = (key, [item])
                    return 
                
                # Same key --> append item if new 
                if entry[0] == key:
                    items_list = entry[1]
                    if item not in items_list:
                        items_list.append(item)
                        self.table[index] = (key, items_list)
                    return
                
                # Collision --> probe to next index
                self.collisions += 1
                i += 1
                index = (original_index + i * step) % self.size

                if i >= self.size:
                    raise Exception("Hash table is full")


    def retrieve(self, key: Any):
        # Retrieve items list for given user key
        if self.collision_avoidance == "separate_chaining":
            index = self.hash(key)
            bucket = self.table[index]

            for pair in bucket:
                if pair[0] == key:
                    return pair[1]
            return None
        
        if self.collision_avoidance == "double_hashing":
            index = self.hash(key)
            step = self.second_hash(key)

            original_index = index
            i = 0

            while True:
                entry = self.table[index]

                if entry is None:
                    return None # key not found
                
                if entry[0] == key:
                    return entry[1]
                
                i += 1
                index = (original_index + i * step) % self.size

                if i >= self.size:
                    return None
                

ht_chain = HashTable(10, collision_avoidance="separate_chaining")
ht_chain.insert("user1", "apple")
ht_chain.insert("user1", "banana")
ht_chain.insert("user2", "carrot")

print("SC user1:", ht_chain.retrieve("user1"))
print("SC user2:", ht_chain.retrieve("user2"))
print("SC collisions:", ht_chain.collisions)
print("SC table:", ht_chain.table)

ht_double = HashTable(11, collision_avoidance="double_hashing")
ht_double.insert("user1", "apple")
ht_double.insert("user1", "banana")
ht_double.insert("user2", "carrot")
ht_double.insert("user3", "donut")

print("DH user1:", ht_double.retrieve("user1"))
print("DH user2:", ht_double.retrieve("user2"))
print("DH user3:", ht_double.retrieve("user3"))
print("DH collisions:", ht_double.collisions)
print("DH table:", ht_double.table)

        
    

AttributeError: 'HashTable' object has no attribute 'insert'

## HashTable (Separate Chaining)

## HashTable (Double Hashing)

In [4]:
## MaxHeap

class MaxHeap:
    def __init__(self):
        # store (priority, item) tuples
        self.data = []
    
    def parent(self, index): 
        return (index - 1) // 2
    
    def left_child(self, index): 
        return 2 * index + 1
    
    def right_child(self, index): 
        return 2 * index + 2
    
    def has_left(self, index): 
        return self.left_child(index) < len(self.data)
    
    def has_right(self, index): 
        return self.right_child(index) < len(self.data)
    
    def swap(self, i, j):
        self.data[i], self.data[j] = self.data[j], self.data[i]
    
    def percolate_up(self, index):
        while index > 0:
            parent_index = self.parent(index)
            if self.data[index][0] > self.data[parent_index][0]:
                self.swap(index, parent_index)
                index = parent_index
            else:
                break
    
    def percolate_down(self, index):
        while self.has_left(index):
            largest_child_index = self.left_child(index)
            if (self.has_right(index) and 
                self.data[self.right_child(index)][0] > self.data[largest_child_index][0]):
                largest_child_index = self.right_child(index)
            
            if self.data[index][0] < self.data[largest_child_index][0]:
                self.swap(index, largest_child_index)
                index = largest_child_index
            else:
                break
    
    def push(self, priority, item):
        self.data.append((priority, item))
        self.percolate_up(len(self.data) - 1)
    
    def pop(self):
        if not self.data:
            return None
        self.swap(0, len(self.data) - 1)
        item = self.data.pop()
        self.percolate_down(0)
        return item
    
    def top_n(self, n):
        result = []
        temp_heap = MaxHeap()
        temp_heap.data = self.data.copy()
        
        for _ in range(min(n, len(self.data))):
            result.append(temp_heap.pop())
        
        return result
    
# Test MaxHeap implementation
heap = MaxHeap()

heap.push(0.5, "itemA")
heap.push(0.9, "itemB")
heap.push(0.7, "itemC")
heap.push(0.2, "itemD")

print("Heap internal array:", heap.data)
print("Top 2:", heap.top_n(2))

print("Pop:", heap.pop())
print("Pop:", heap.pop())
print("Remaining heap:", heap.data)


Heap internal array: [(0.9, 'itemB'), (0.5, 'itemA'), (0.7, 'itemC'), (0.2, 'itemD')]
Top 2: [(0.9, 'itemB'), (0.7, 'itemC')]
Pop: (0.9, 'itemB')
Pop: (0.7, 'itemC')
Remaining heap: [(0.5, 'itemA'), (0.2, 'itemD')]


## RecommendationSystem 

## Main Driver

## Analysis