# The BSTree data structure

## Agenda

- API
- Implementation
    - Search
    - Insertion
    - Removal
    - Iteration / Traversal
- Runtime complexity

## API

In [1]:
class BSTree:
    class Node:
        def __init__(self, val, left=None, right=None):
            self.val = val
            self.left = left
            self.right = right
            
    def __init__(self):
        self.size = 0
        self.root = None
    
    def __contains__(self, val):
        """Returns `True` if val is in this tree and `False` otherwise."""
        pass
    
    def insert(self, val):
        """inserts `val` to this tree while maintaining BSTree properties."""
        assert val not in self
        pass    

    def __delitem__(self, val):
        """Deletes `val` from this tree while maintaining BSTree properties."""
        assert val in self
        pass
    
    def __iter__(self):
        """Returns an iterator over all the values in the tree, in ascending order."""
        pass

    def __len__(self):
        return self.size
    
    def height(self):
        """Returns the height of the root of the tree."""
        def height_rec(t):
            if not t:
                return 0
            else:
                return 1 + max(height_rec(t.left), height_rec(t.right))
        return height_rec(self.root)

    def pprint(self, width=64):
        """Attempts to pretty-print this tree's contents."""
        height = self.height()
        nodes  = [(self.root, 0)]
        prev_level = 0
        repr_str = ''
        while nodes:
            n,level = nodes.pop(0)
            if prev_level != level:
                prev_level = level
                repr_str += '\n'
            if not n:
                if level < height-1:
                    nodes.extend([(None, level+1), (None, level+1)])
                repr_str += '{val:^{width}}'.format(val='-', width=width//2**level)
            elif n:
                if n.left or level < height-1:
                    nodes.append((n.left, level+1))
                if n.right or level < height-1:
                    nodes.append((n.right, level+1))
                repr_str += '{val:^{width}}'.format(val=n.val, width=width//2**level)
        print(repr_str)

In [2]:
t = BSTree()
t.root = BSTree.Node(5,
                    left=BSTree.Node(2),
                    right=BSTree.Node(10))
t.size = 3

In [3]:
t.height()

2

In [4]:
t.pprint()

                               5                                
               2                               10               


## Implementation

### Search

In [5]:
class BSTree(BSTree):
    def __contains__(self, val):
        def contains_rec(n): # recursive helper function (can use any local vars in scope)
            if n is None:
                return False
            elif val == n.val:
                return True
            elif val < n.val:
                return contains_rec(n.left)
            else:
                return contains_rec(n.right)
        
        return contains_rec(self.root)

In [6]:
t = BSTree()
t.root = BSTree.Node(5,
                    left=BSTree.Node(2),
                    right=BSTree.Node(10))
t.size = 3

In [7]:
10 in t

True

### Insertion

In [8]:
class BSTree(BSTree):
    def insert(self, val):
        def insert_rec(n):
            if n is None:
                return BSTree.Node(val)
            elif val < n.val:
                n.left = insert_rec(n.left)
                return n
            elif val > n.val: 
                n.right = insert_rec(n.right)
                return n
        
        assert val not in self # guarantees unique values
        self.root = insert_rec(self.root)
        self.size += 1

In [14]:
import random
t = BSTree()
vals = list(range(5))
random.shuffle(vals)
for x in vals:
    t.insert(x)
t.pprint()

                               3                                
               0                               4                
       -               1               -               -        
   -       -       -       2       -       -       -       -    


### Deletion

In [15]:
class BSTree(BSTree):
    def __delitem__(self, val):
        def delitem_rec(n):
            if val < n.val:
                n.left = delitem_rec(n.left)
                return n
            elif val > n.val:
                n.right = delitem_rec(n.right)
                return n
            else:
                if n.left is None and n.right is None:
                    return None
                elif n.left is None and n.right is not None:
                    return n.right
                elif n.right is None and n.left is not None:
                    return n.left
                else:
                    pass
            
        assert val in self
        self.root = delitem_rec(self.root)
        self.size -= 1

In [16]:
t = BSTree()
for x in [10, 5, 15, 2, 17]:
    t.insert(x)
t.pprint()

                               10                               
               5                               15               
       2               -               -               17       


In [17]:
del t[2]
t.pprint()

                               10                               
               5                               15               
       -               -               -               17       


In [18]:
t = BSTree()
for x in [10, 5, 15, 2, 17]:
    t.insert(x)
t.pprint()

                               10                               
               5                               15               
       2               -               -               17       


In [19]:
del t[5]
t.pprint()

                               10                               
               2                               15               
       -               -               -               17       


In [21]:
t = BSTree()
for x in [10, 5, 15, 2, 17]:
    t.insert(x)
t.pprint()

                               10                               
               5                               15               
       2               -               -               17       


In [22]:
del t[15]
t.pprint()

                               10                               
               5                               17               
       2               -               -               -        


In [23]:
t = BSTree()
for x in [10, 5, 15, 2, 17]:
    t.insert(x)
t.pprint()

                               10                               
               5                               15               
       2               -               -               17       


In [24]:
del t[10]
t.pprint()

                               -                                


In [25]:
class BSTree(BSTree):
    def __delitem__(self, val):
        def delitem_rec(n):
            if val < n.val:
                n.left = delitem_rec(n.left)
                return n
            elif val > n.val:
                n.right = delitem_rec(n.right)
                return n
            else:
                if n.left is None and n.right is None:
                    return None
                elif n.left is None and n.right is not None:
                    return n.right
                elif n.right is None and n.left is not None:
                    return n.left
                else:
                    # remove max value from left subtree to replace our root
                    m = n.left
                    if m.right is None:
                        n.left = m.left
                    else:
                        pm = m
                        m = m.right
                        while m.right:
                            pm = m
                            m = m.right
                        pm.right = m.left
                    n.val = m.val
                    return n
            
        assert val in self
        self.root = delitem_rec(self.root)
        self.size -= 1

In [26]:
t = BSTree()
for x in [10, 5, 2, 7, 9, 8, 1, 15, 12, 18]:
    t.insert(x)
t.pprint()

                               10                               
               5                               15               
       2               7               12              18       
   1       -       -       9       -       -       -       -    
 -   -   -   -   -   -   8   -   -   -   -   -   -   -   -   -  


In [27]:
del t[15]
t.pprint()

                               10                               
               5                               12               
       2               7               -               18       
   1       -       -       9       -       -       -       -    
 -   -   -   -   -   -   8   -   -   -   -   -   -   -   -   -  


In [28]:
t = BSTree()
for x in [10, 5, 2, 7, 9, 8, 1, 15, 12, 18]:
    t.insert(x)
t.pprint()

                               10                               
               5                               15               
       2               7               12              18       
   1       -       -       9       -       -       -       -    
 -   -   -   -   -   -   8   -   -   -   -   -   -   -   -   -  


In [29]:
del t[5]
t.pprint()

                               10                               
               2                               15               
       1               7               12              18       
   -       -       -       9       -       -       -       -    
 -   -   -   -   -   -   8   -   -   -   -   -   -   -   -   -  


In [30]:
t = BSTree()
for x in [10, 5, 2, 7, 9, 8, 1, 15, 12, 18]:
    t.insert(x)
t.pprint()

                               10                               
               5                               15               
       2               7               12              18       
   1       -       -       9       -       -       -       -    
 -   -   -   -   -   -   8   -   -   -   -   -   -   -   -   -  


In [31]:
del t[10]
t.pprint()

                               9                                
               5                               15               
       2               7               12              18       
   1       -       -       8       -       -       -       -    


### Iteration / Traversal

In [32]:
class BSTree(BSTree):
    def __iter__(self):
        def iter_rec(n):
            if n:
                yield from iter_rec(n.left)
                yield n.val
                yield from iter_rec(n.right)
        return iter_rec(self.root)

In [33]:
import random
t = BSTree()
vals = list(range(20))
random.shuffle(vals)
for x in vals:
    t.insert(x)
for x in t:
    print(x)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


### Iteration by Level

In [34]:
class BSTree(BSTree):
    def __iter__(self):
        queue = [self.root]
        while queue:
            n = queue.pop()
            yield n.val
            if n.left:
                queue.insert(0, n.left)
            if n.right:
                queue.insert(0, n.right)

In [36]:
import random
t = BSTree()
vals = list(range(10))
random.shuffle(vals)
for x in vals:
    t.insert(x)
t.pprint()

                               6                                
               0                               8                
       -               3               7               9        
   -       -       1       4       -       -       -       -    
 -   -   -   -   -   2   -   5   -   -   -   -   -   -   -   -  


In [37]:
for x in t:
    print(x)

6
0
8
3
7
9
1
4
2
5


## Runtime Complexity

The runtime complexity of the search, insert, and delete methods of the binary search tree are dependent, ultimately, on the depth of their recursive implementation. The depth of recursion is in turn dependent on the height of the binary search tree.

Given $N$ nodes, the height of a binary search tree is, in the worst case = $O(N)$

This gives us the following worst-case runtime complexities:

- Search = $O(N)$
- Insert = $O(N)$
- Delete = $O(N)$

How can we improve this runtime complexity? What should be our target runtime complexity?