|   | search | insert | delete(by red) |
|---|---|---|---|
| Array | O(n) | O(1) | O(1)\* |
| Linked list | O(n) | O(1) | O(1) |
| Sorted array | O(log n) | O(n) | O(n) |
| Wanted | O(log n) | O(log n) | O(log n) |

\* what you can do if you want to delete an element, you switch it with the very last element and then you can delete the last element, okay? Because you don't care about the order of elements in your array. 

__Binary Search Trees__

root x, every children in the left subtree is $\leq x$, and every children in the right subtree is $\geq x$

In [None]:
class Node:
    def __init__(self, key = 0, parent=None):
        self.key = key
        self.left = None
        self.right = None
        self.parent = parent

In [None]:
def bst_insert(root, node):
    """
    complexity
    #calls <= #levels = h
    Total: O(h),
    """
    if root.key > node.key:
        if root.left == None:
            root.left = node
            node.parent = root
        else:
            bst_insert(root.left, node)
    else:
        if root.right == None:
            root.right = node
            node.parent = root
        else:
            bst_insert(root.right, node)

In [None]:
def bst_search(root, val):
    """
    complexity
    #calls <= #levels = h
    Total: O(h), h is often log(n), (not always)
    """
    if root == None or root.key == val: # O(1)
        return root # O(1)
    if val < root.key: # O(1)
        return bst_search(root.left, val)
#     if val > root.key:
    return bst_search(root.right, val)

In [None]:
def bst_delete(root, val):
    """
    1. find max in the left subtree               O(h)
    2. delete it when key is none or left is none O(1)
    3. or replace root with the deleted node      O(1)
    total complexity O(h)
    """

Height of a tree pf size n:
   
worst case: n

best case (perfectly balanced): $log_2(n+1)$ $ n \le 1+2+4+\dots+2^{h-1} = 2^{h}-1, h \ge log_2(n+1)$


_Good news_

- random data => h~log_2(n) on average

- self-balancing, implementation is complicated, RB-trees, AVL-trees, splay-trees; (not strictly binary: 2-3 trees, B-trees)

__Summary__

- Data structure for comparable items
- search/insert/delete: O(h), where h is height
- With good effort: h=O(log(n))
- Extra structure => extra methods(e.g., order statistics, interval trees. etc)

In [2]:
import math
from pprint import pprint

class Node:
    def __init__(self, key=0, parent = None):
        self.key = key
        self.left = None
        self.right = None
        self.parent = parent

def insert(root, node):
    if root.key > node.key:
        if root.left == None:
            root.left = node
            node.parent = root
        else:
            insert(root.left, node)
    else:
        if root.right == None:
            root.right = node
            node.parent = root
        else:
            insert(root.right, node)

def random_insert(root, node):
    if node.key % 3 == 0 and root.key > node.key:
        if root.left == None:
            root.left = node
            node.parent = root
        else:
            random_insert(root.left, node)
    else:
        if root.right == None:
            root.right = node
            node.parent = root
        else:
            random_insert(root.right, node)

#######################################################
counter = 0
def tree_size(root):
    if root is None:
        return 0
    return 1 + tree_size(root.left) + tree_size(root.right)

def tree_max(root):
    # global counter
    # counter += 1
    # print(pprint(vars(root)))
    """
    time complexity should be at least O(n)
    @param root is an instance of the Node class representing a binary tree (not necessarily a Binary Search Tree), 
    @return the largest number in the tree. For empty tree, we set the max to be -math.inf.
    """
    if root is None:
        return -math.inf
    elif root.left is None and root.right is None:
        return root.key

    return max(root.key, tree_max(root.left), tree_max(root.right))

def _check_BST(root, minimum = -math.inf, maximum = math.inf):
    """
    it is not enough to check that the key at each nodes is at most the key of the 
    @return True if root is a binary SEARCH tree, False otherwise
                      10
                     _|_
                    /   \
                   0     20
                 __|     |__
               /   |     |   \
             -5    5     15   25
            __|  __|     |__   |__
           /  | /  |     |  \  |  \
          -9 -2 1  6     12 17 22  29
             
    """
    if root is None:
        return True, minimum, maximum

    flag_left = True
    flag_right = True

    if root.left is not None:
        flag_left, minimum, _ = _check_BST(root.left, minimum, maximum)

    if root.right is not None:
        flag_right, _, maximum = _check_BST(root.right, minimum, maximum)

    # print(root.key)

    if flag_left == False or flag_right == False:
        return False, minimum, maximum

    if root.key > minimum:
        minimum = root.key
    
    if root.key < maximum:
        maximum = root.key

    if root.left is None and root.right is None:
        return True, minimum, maximum

    if root.key >= minimum and root.key <= maximum:
        return True, minimum, maximum
    else:
        return False, minimum, maximum


def check_BST(root):
    return _check_BST(root)[0]

def _min_diff(root, diff = math.inf, minimum = -math.inf, maximum = math.inf):
    """
    computes the smallest absolute value of the difference between the keys in different nodes. 
    @return int
                      10
                     _|_
                    /   \
                   0     20
                 __|     |__
               /   |     |   \
             -5    5     15   25
            __|  __|     |__   |__
           /  | /  |     |  \  |  \
          -9 -2 1  6     12 17 22  29
             
    """
    if root is None:
        return diff, minimum, maximum

    diff_left = math.inf
    diff_right = math.inf

    if root.left is not None:
        diff_left, _, maximum = _min_diff(root.left, diff, minimum, maximum)

    if root.right is not None:
        diff_right, minimum, _ = _min_diff(root.right, diff, minimum, maximum)

    diff = min(abs(maximum - root.key), abs(root.key - minimum), diff, diff_left, diff_right)
    # print(root.key, minimum, maximum)
    if root.key > minimum:
        minimum = root.key
    
    if root.key < maximum:
        maximum = root.key

    return diff, minimum, maximum

def min_diff(root):
    return _min_diff(root)[0]

def _count_distinct(root, distinct = [1], minimum = -math.inf, maximum = math.inf):
    """
    computes the number of distinct keys present in the tree 
    左取大,右取小,蒙对的
    @return int
                      10
                     _|_
                    /   \
                   0     20
                 __|     |__
               /   |     |   \
             -5    5     15   25
            __|  __|     |__   |__
           /  | /  |     |  \  |  \
          -9 -2 1  6     12 17 22  29
             
    """
    if root is None:
        return distinct, minimum, maximum

    minimum_l = -math.inf
    maximum_l = math.inf
    minimum_r = -math.inf
    maximum_r = math.inf

    if root.left is not None:
        _, minimum_l, maximum_l = _count_distinct(root.left, distinct, minimum, maximum)

    if root.right is not None:
        _, minimum_r, maximum_r = _count_distinct(root.right, distinct, minimum, maximum)

    # print(root.key, minimum, maximum, distinct, minimum_l, maximum_l, minimum_r, maximum_r)

    if minimum_l != -math.inf and root.key > minimum_l:
        distinct[0] = distinct[0] + 1

    if maximum_r != math.inf and root.key < maximum_r:
        distinct[0] = distinct[0] + 1

    if root.key < maximum_l:
        maximum_l = root.key

    if root.key > minimum_r:
        minimum_r = root.key

    # print(root.key, minimum, maximum)

    if root.left is None and root.right is None:
        # distinct[0] = distinct[0]+1
        # print('end', distinct, minimum_r, maximum_l)
        return distinct, minimum_r, maximum_l
    # print('level up', distinct, minimum_r, maximum_l)
    return distinct, minimum_r, maximum_l

def count_distinct(root):
    return _count_distinct(root)[0][0]

#################################################

if __name__ == "__main__":
    T = Node(3)
    insert(T, Node(1))
    insert(T, Node(2))
    # insert(T, Node(1))
    # insert(T, Node(2))
    # insert(T, Node(1))
    # insert(T, Node(2))
    # insert(T, Node(1))
    # insert(T, Node(2))
    # insert(T, Node(1))
    insert(T, Node(-2))
    # insert(T, Node(1))
    insert(T, Node(100))
    # insert(T, Node(1))
    # insert(T, Node(2))
    # # random_insert(T, Node(3))
    # # random_insert(T, Node(4))
    # # random_insert(T, Node(5))
    # # random_insert(T, Node(6))
    # insert(T, Node(7))
    # insert(T, Node(22))
    # insert(T, Node(34))
    # insert(T, Node(19))
    # insert(T, Node(7))
    # insert(T, Node(8))
    # insert(T, Node(9))
    # insert(T, Node(10))
    # insert(T, Node(13))

    # # should print True
    print(count_distinct(T))

    # print(counter)

    # # should print 1
    # print(min_diff(T))

5
