# Symbol Tables and BSTs
## Exercise – Linked List vs Ordered Array Symbol Table

We implement the symbol table ADT first using an unordered linked list and then using an ordered array. In the following we only implement basic put/get operations. We compare their performance in supporting search (get) operations as we grow the size of input n.

### Linked List ST

In [1]:
class Node:
    def __init__(self, key, value, nextNode):
        self.key=key
        self.value=value
        self.nextNode = nextNode

class SequentialSearchST:
            
    def __init__(self):
        self.n = 0
        self.first = None
        
    def size(self):
        return self.n

    def isEmpty(self):
        return self.n==0
    
    def get(self, key): 
        if key is None:
            print("Error - ST cannot contain null keys")
            return None
        x = self.first
        while x is not None:
            if key == x.key:
                return x.value
            else:
                x = x.nextNode
                
        return None

    def put(self, key, value):
        if key is None or value is None:
            print("Error - Cannot enter null key/values in ST")
            return 
        
        x = self.first

        while x is not None:
            if key == x.key:
                x.value = value
                return
            else:
                x = x.nextNode

        self.first = Node(key, value, self.first)
        self.n = self.n + 1
        
    def debug(self):
        x = self.first
        while x is not None:
            print(x.key, x.value)
            x = x.nextNode

### Ordered Array ST

In [2]:
class OrderedArrayST:
            
    def __init__(self, capacity):
        self.keys = [-1] * capacity
        self.values = [-1] * capacity
        self.n = 0
        
    def size(self):
        return self.n

    def isEmpty(self):
        return self.n == 0

    def binSearch(self, key, lo, hi):    
        if lo > hi:
            return -1

        mid = (lo + hi) // 2
        if key == self.keys[mid]:
            return mid

        if key < self.keys[mid]:
            return self.binSearch(key, lo, mid-1)
        else:
            return self.binSearch(key, mid+1, hi)
    
    
    def insertionSort(self, key):
        if self.n == 0:
            self.keys[0] = key
            return 0
        
        pos = 0
        while self.keys[pos] < key and pos < self.n:
            pos = pos + 1
            
        if pos != self.n:
            for i in range(self.n, pos, -1):
                self.keys[i] = self.keys[i-1]
                self.values[i] = self.values[i-1]
        self.keys[pos] = key
        return pos
        
    def get(self, key): 
        if key is None:
            print("Error - ST cannot contain null keys")
            return None
        
        if self.isEmpty():
            return None
        
        i = self.binSearch(key, 0, self.n -1) 

        if i != -1:
            return self.values[i]
        else: 
            return None
    
    
    def put(self, key, value):
        if key is None or value is None:
            print("Error- ST cannot contain null keys")
            return
        
        if len(self.keys) == self.n:
            print("Error - ST is full")
            return

        i = self.binSearch(key, 0, (self.n) - 1) 

        if i != -1:
            self.values[i] = value
            return
        else: 
            i = self.insertionSort(key)
            self.values[i] = value
            self.n += 1
            return
 
    def debug(self):
        x = 0
        while x < self.n:
            print(self.keys[x], self.values[x])
            x = x + 1

In [3]:
# driver code
import timeit
import random

size = [5000, 10000, 20000, 40000, 80000]

# double array where we store run-time performance of the 2 ST implementations
# 0 / 1 : put / get SSST
# 2 / 3 : put / get OAST
times = [[0 for j in range(len(size))] for i in range(4)]


# random numbers between min_value and max_value to use as keys in ST
min_value = 0
max_value = 5000

for i in range(len(size)):

    randomListOfKeys = [random.randint(min_value, max_value) for i in range(size[i])] 
  
    ssst = SequentialSearchST()
    starttime = timeit.default_timer()
    for j in range(len(randomListOfKeys)):
        ssst.put(randomListOfKeys[j], randomListOfKeys[j])
    endtime = timeit.default_timer()
    times[0][i] = round(endtime-starttime,3)

    oast = OrderedArrayST(size[i])
    starttime = timeit.default_timer()
    for j in range(len(randomListOfKeys)):
        oast.put(randomListOfKeys[j], randomListOfKeys[j])
    endtime = timeit.default_timer()
    times[2][i] = round(endtime-starttime,3)

    randomListOfSearchKeys = [random.randint(min_value, max_value) for i in range(size[i])] 

    starttime = timeit.default_timer()
    for j in range(len(randomListOfSearchKeys)):
        ssst.get(randomListOfSearchKeys[j])
    endtime = timeit.default_timer()
    times[1][i] = round(endtime-starttime,3)

    starttime = timeit.default_timer()
    for j in range(len(randomListOfSearchKeys)):
        oast.get(randomListOfSearchKeys[j])
    endtime = timeit.default_timer()
    times[3][i] = round(endtime-starttime,3)
    
print("Size n :", size)
print("SSST put :", times[0])
print("OAST put :", times[2])
print("SSST get :", times[1])
print("OAST get :", times[3])



Size n : [5000, 10000, 20000, 40000, 80000]
SSST put : [0.757, 1.878, 4.47, 9.521, 20.499]
OAST put : [1.229, 2.146, 2.775, 3.045, 4.108]
SSST get : [1.079, 2.416, 5.023, 10.964, 23.198]
OAST get : [0.025, 0.049, 0.098, 0.227, 0.397]


## Exercise – Is a Binary Tree a Binary Search Tree?
We begin implementing the BST data structure seen at lectures, where each Node in the tree has a key, a value, and a pair of left/right child nodes). We then implement an algorithm that, given as argument a Node, determines if this is the root of a BST and returns True if so, or False otherwise. 



In [4]:
class BSTNode: 
    def __init__(self, key, value): 
        self.key = key
        self.value = value
        self.left = None 
        self.right = None 


    def get(self, key):
        if self.key == key: 
            return self.value 
        elif key < self.key and self.left is not None:
            return self.left.get(key) 
        elif key > self.key and self.right is not None: 
            return self.right.get(key) 
        else: 
            return None

    def put(self, key, value):
        if key == self.key:
            self.value = value
        elif key < self.key:
            if self.left is None:
                self.left = BSTNode(key, value)
            else:
                self.left.put(key, value)
        elif key > self.key:
            if self.right is None:
                self.right = BSTNode(key, value)
            else:
                self.right.put(key, value)     
        
           
def isBST(node):
    def isBST(node, minValue, maxValue):   
        if node is None:
            return True
        if minValue is not None and node.key <= minValue:
            return False
        if maxValue is not None and node.key >= maxValue:
            return False
        return isBST(node.left, minValue, node.key) and isBST(node.right, node.key, maxValue)
    
    return isBST(node, None, None)
            
            

In [5]:
# driver code
import random

# random numbers between min_value and max_value to use as keys in ST
min_value = 0
max_value = 500
size = 50

randomListOfKeys = [random.randint(min_value, max_value) for i in range(size)] 

BSTroot = BSTNode(randomListOfKeys[0], 0)

for i in range (1, size):
    BSTroot.put(randomListOfKeys[i], i)


print(isBST(BSTroot))
print(isBST(BSTroot.left))
print(isBST(BSTroot.right))

True
True
True


## Exercise – Interval Search in a BST
We implement an algorithm to support <tt>intervalSearch(from, to)</tt> that combines ideas from BST search and in-order traversal. In particular: 
1. binary search for <tt>from</tt> in the BST
2. if the key is smaller than (or equal to) <tt>from</tt>, recursively search in the right subtree (all keys in the left subtree will be smaller than the search interval)
3. if the key is bigger than <tt>from</tt>, first recursively search in the left subtree. When done, check if the key is also smaller than <tt>to</tt>. If so, add it to the output and also recursively visit the right substree.
We use the definition of <tt>BSTNode</tt> provided above

In [6]:
def intervalSearch(node, a, b, interval):
    if node is None:
            return
    if node.key <= a:
        intervalSearch(node.right, a, b, interval)
    else:
        intervalSearch(node.left, a, b, interval)
        if node.key < b:
            interval.append(node.key)
            intervalSearch(node.right, a, b, interval)


In [9]:
# driver code
import random

# random numbers between min_value and max_value to use as keys in ST
min_value = 0
max_value = 500
size = 10

randomListOfKeys = [random.randint(min_value, max_value) for i in range(size)] 

print("Keys to add to BST:", randomListOfKeys)

BSTroot = BSTNode(randomListOfKeys[0], 0)
for i in range (1, size):
    BSTroot.put(randomListOfKeys[i], i)

interval = []
a = 100
b = 300
intervalSearch(BSTroot, a, b, interval)

print("Interval search [", a, "," , b, "] = ", interval)

Keys to add to BST: [243, 137, 282, 490, 283, 186, 68, 125, 289, 89]
Interval search [ 100 , 300 ] =  [125, 137, 186, 243, 282, 283, 289]
