# The BSTree data structure

## Agenda

- API
- Implementation
    - Search
    - Addition
    - Removal
    - Iteration / Traversal
- Runtime complexity

## API

In [2]:
class BSTree:
    class Node:
        def __init__(self, val, left=None, right=None):
            self.val = val
            self.left = left
            self.right = right
            
    def __init__(self):
        self.size = 0
        self.root = None
    
    def __contains__(self, val):
        """Returns `True` if val is in this tree and `False` otherwise."""
        pass
    
    def add(self, val):
        """Adds `val` to this tree while maintaining BSTree properties."""
        assert val not in self
        pass    

    def __delitem__(self, val):
        """Removes `val` from this tree while maintaining BSTree properties."""
        assert val in self
        pass
    
    def __iter__(self):
        """Returns an iterator over all the values in the tree, in ascending order."""
        pass

    def __len__(self):
        return self.size
    
    def height(self):
        """Returns the height of the root of the tree."""
        def height_rec(t):
            if not t:
                return 0
            else:
                return 1 + max(height_rec(t.left), height_rec(t.right))
        return height_rec(self.root)

    def pprint(self, width=64):
        """Attempts to pretty-print this tree's contents."""
        height = self.height()
        nodes  = [(self.root, 0)]
        prev_level = 0
        repr_str = ''
        while nodes:
            n,level = nodes.pop(0)
            if prev_level != level:
                prev_level = level
                repr_str += '\n'
            if not n:
                if level < height-1:
                    nodes.extend([(None, level+1), (None, level+1)])
                repr_str += '{val:^{width}}'.format(val='-', width=width//2**level)
            elif n:
                if n.left or level < height-1:
                    nodes.append((n.left, level+1))
                if n.right or level < height-1:
                    nodes.append((n.right, level+1))
                repr_str += '{val:^{width}}'.format(val=n.val, width=width//2**level)
        print(repr_str)

In [3]:
t = BSTree()
t.root = BSTree.Node(5,
                    left=BSTree.Node(2),
                    right=BSTree.Node(10))
t.size = 3

In [30]:
t.height()

2

In [4]:
t.pprint()

                               5                                
               2                               10               


## Implementation

### Search

In [6]:
class BSTree(BSTree):
    def __contains__(self, val):
        def contains_rec(node):
            
            if node is None:
                return False
            if node.val == val:
                return True
            elif val < node.val:
                return contains_rec(node.left)
            else:
                return contains_rec(node.right)
        return contains_rec(self.root)

In [32]:
t = BSTree()
t.root = BSTree.Node(5,
                    left=BSTree.Node(2),
                    right=BSTree.Node(10))
t.size = 3

In [33]:
3 in t

False

### Addition

In [5]:
class BSTree(BSTree):
    def add(self, val):
        def add_rec(node):
            if node is None:
                return BSTree.Node(val)
            elif val < node.val:
                node.left = add_rec(node.left)
                return node
            else:
                node.right = add_rec(node.right)
                return node

        assert val not in self
        self.root = add_rec(self.root)
        self.size += 1

In [39]:
import random
t = BSTree()
vals = list(range(10))
random.shuffle(vals)
for x in vals:
    t.add(x)
t.pprint()

                               9                                
               1                               -                
       0               2               -               -        
   -       -       -       3       -       -       -       -    
 -   -   -   -   -   -   -   8   -   -   -   -   -   -   -   -  
- - - - - - - - - - - - - - 7 - - - - - - - - - - - - - - - - - 
----------------------------4-----------------------------------
---------------------------------------------------------5----------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------------6--------------------------------------------------------------------------------------------------------------------------------------------


### Removal
There are three different cases. The node to delete:
- is a leaf node (has no children) -> easy just delete it
- has only one child (left or right) -> more or less easy swap the child one step higher
- has two children -> hmm, that's difficult

Let's handle the easy cases first:

In [40]:
class BSTree(BSTree):
    def __delitem__(self, val):
        def del_rec(node):
            if val < node.val:
                node.left = del_rec(node.left)
                return node
            elif val > node.val:
                node.right = del_rec(node.right)
                return node
            else:
                if node.left is None and node.right is None:
                    return None
                elif node.left is None and node.right is not None:
                    return node.right
                elif node.left is not None and node.right is None:
                    return node.left
                else:
                    pass


        assert val in self
        self.root = del_rec(self.root)
        self.size -= 1

In [41]:
t = BSTree()
for x in [10, 5, 15, 2, 17]:
    t.add(x)
t.pprint()

                               10                               
               5                               15               
       2               -               -               17       


In [42]:
del t[2]
t.pprint()

                               10                               
               5                               15               
       -               -               -               17       


In [44]:
t = BSTree()
for x in [10, 5, 15, 2, 17]:
    t.add(x)
t.pprint()

                               10                               
               5                               15               
       2               -               -               17       


In [45]:
del t[5]
t.pprint()

                               10                               
               2                               15               
       -               -               -               17       


In [46]:
t = BSTree()
for x in [10, 5, 15, 2, 17]:
    t.add(x)
t.pprint()

                               10                               
               5                               15               
       2               -               -               17       


In [47]:
del t[15]
t.pprint()

                               10                               
               5                               17               
       2               -               -               -        


In [48]:
t = BSTree()
for x in [10, 5, 15, 2, 17]:
    t.add(x)
t.pprint()

                               10                               
               5                               15               
       2               -               -               17       


In [49]:
del t[10]
t.pprint()

                               -                                


Well, ... seems like we have to handle the complex case now.

Proposals?

There are different strategies:

In [13]:
class BSTree(BSTree):
    def __delitem__(self, val):
        assert val in self
        def delitem_rec(node):
            if val < node.val:
                node.left = delitem_rec(node.left)
                return node
            elif val > node.val:
                node.right = delitem_rec(node.right)
                return node
            else:
                if not node.left and not node.right:
                    return None
                elif node.left and not node.right:
                    return node.left
                elif node.right and not node.left:
                    return node.right
                else:
                    # handle the complex case
                    max = node.left
                    if max.right is None:
                        node.left = max.left
                    else:
                        prevMax = max
                        max = max.right
                        while max.right:
                            prevMax = max
                            max = max.right
                        prevMax.right = max.left

                    node.val = max.val
                    return node

        self.root = delitem_rec(self.root)
        self.size -= 1

In [15]:
t = BSTree()
for x in [10, 5, 2, 3, 7, 9,  8, 1, 15, 12, 6, 18]:
    t.add(x)
t.pprint()

                               10                               
               5                               15               
       2               7               12              18       
   1       3       6       9       -       -       -       -    
 -   -   -   -   -   -   8   -   -   -   -   -   -   -   -   -  


In [16]:
del t[15]
t.pprint()

                               10                               
               5                               12               
       2               7               -               18       
   1       3       6       9       -       -       -       -    
 -   -   -   -   -   -   8   -   -   -   -   -   -   -   -   -  


In [17]:
t = BSTree()
for x in [10, 5, 2, 7, 9, 8, 1, 15, 12, 18]:
    t.add(x)
t.pprint()

                               10                               
               5                               15               
       2               7               12              18       
   1       -       -       9       -       -       -       -    
 -   -   -   -   -   -   8   -   -   -   -   -   -   -   -   -  


In [18]:
del t[5]
t.pprint()

                               10                               
               2                               15               
       1               7               12              18       
   -       -       -       9       -       -       -       -    
 -   -   -   -   -   -   8   -   -   -   -   -   -   -   -   -  


In [19]:
t = BSTree()
for x in [10, 5, 2, 7, 9, 8, 1, 15, 12, 18]:
    t.add(x)
t.pprint()

                               10                               
               5                               15               
       2               7               12              18       
   1       -       -       9       -       -       -       -    
 -   -   -   -   -   -   8   -   -   -   -   -   -   -   -   -  


In [20]:
del t[10]
t.pprint()

                               9                                
               5                               15               
       2               7               12              18       
   1       -       -       8       -       -       -       -    


### Iteration / Traversal

In [29]:
class BSTree(BSTree):
    def __iter__(self):
        def iter_rec(node):
            if node:
                yield from iter_rec(node.left)
                yield node.val
                yield from iter_rec(node.right)
            

        return iter_rec(self.root)    

In [30]:
import random
t = BSTree()
vals = list(range(20))
random.shuffle(vals)
for x in vals:
    t.add(x)
t.pprint()
for x in t:
    print(x)

                               10                               
               9                               13               
       0               -               12              17       
   -       7       -       -       11      -       16      18   
 -   -   6   8   -   -   -   -   -   -   -   -   14  -   -   19 
- - - - 5 - - - - - - - - - - - - - - - - - - - - 15- - - - - - 
--------1-------------------------------------------------------
-----------------3--------------------------------------------------------------------------------------------------------------
----------------------------------24----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


### Iteration by Level

In [31]:
class BSTree(BSTree):
    def __iter__(self):
        def iter_rec(node):
            if node:
                yield node.val
                yield from iter_rec(node.left)
                yield from iter_rec(node.right)
            

        return iter_rec(self.root)    

In [35]:
import random
t = BSTree()
vals = list(range(10))
random.shuffle(vals)
for x in vals:
    t.add(x)
t.pprint()

                               3                                
               2                               7                
       0               -               4               9        
   -       1       -       -       -       5       8       -    
 -   -   -   -   -   -   -   -   -   -   -   6   -   -   -   -  


In [36]:
for x in t:
    print(x)

3
2
0
1
7
4
5
6
9
8


## Runtime Complexity

The runtime complexity of the search, add, and delete methods of the binary search tree are dependent, ultimately, on the depth of their recursive implementation. The depth of recursion is in turn dependent on the height of the binary search tree.

Given $N$ nodes, the height of a binary search tree is, in the worst case = ?

This gives us the following worst-case runtime complexities:

- Search = O(?)
- Add = O(?)
- Delete = O(?)

How can we improve this runtime complexity? What should be our target runtime complexity?