In [4]:
class BstNode:
    def __init__(self, data = None, left = None, right = None):
        self.data, self.left, self.right = data, left, right 

In [5]:
def search_bst(tree: BstNode, key: int) -> BstNode:
    return (tree
           if not tree or tree.data == key else search_bst(tree.left, key)
           if key < tree.data else search_bst(tree.right, key))

In [6]:
def tree_traversal_inorder(root: BinaryTreeNode) -> None:
    if root:
        tree_traversal_inorder(root.left) 
        print('Inorder: %d' % root.data)
        tree_traversal_inorder(root.right)

## 14.1 Test if a binary tree satisfies the BST property 

Write a program that takes as input a binary tree and checks if the tree satisfies the BST property. 

**Hint:** Is it correct to check for each node that is key is greater than or equal to the key at its left child and less than or equal to the key ar its right child?

Not correct. We need to check each node is greater than or equal to the key at not only its left child and all left descendants recursively. The same for right descendants. 

**Sol：** A direct approach, based on the definition of a BST, is to begin with the root, and compute the maximum key stored in the root's left subtree, and the minimum key in the root's right subtree. We check that the key at the root is greater than or equal to the maximum from the left subtree and less than or equal to the minimum from the right subtree. If both these checks pass, we recursively check the root's left and right subtrees. If either check fails, we return false. 

Computing the minimum key in a binary tree is starghtforward: we take the minimum of the key stored at its root, the minimum key of the left subtree, and the minimum key of the right subtree. The maximum key is computed similarly. Note that the minimum can be in either subtree, since a general binary tree may not satisfy the BST property. 

The problem with this approach is that it will repeatedly traverse subtrees. In the worst-case, when the tree is a BST and each node's left child is empty, the complexity is O(n^2), where n is the number of nodes. The complexity can be improved to O(n) by caching the largest and the smallest keys at each node; this requires O(n) additional storage for the cache. 

We now present two approaches which have O(n) time complexity. 

The first appraoch is to check constraints on the values for each subtree. The initial constraint comes from the root. Every node in its left (right) subtree must have a key less than or equal (greater than or equal) to the key at the root. This idea generalizes: if all nodes in a tree must have keys in the range [l,u], and the key at the root is w (which itself must be between [l,u], otherwise the requirement is violated at the root itself), then all keys in the left subtree must be in the range [l,w], and all keys stored in the right subtree must be in the range [w,u]. 

In [13]:
def is_binary_tree_bst(tree: BinaryTreeNode) -> bool:
    def are_keys_in_range(tree,
                         low_range = float('-inf'),
                         high_range = float('inf')):
        print('low range', 'high range')
        print(low_range, high_range)
        if not tree:
            return True
        elif not low_range <= tree.data <= high_range:
            return False
        return (are_keys_in_range(tree.left, low_range, tree.data)
               and are_keys_in_range(tree.right, tree.data, high_range))
    return are_keys_in_range(tree)

The time complexity is O(n), since we need to go through every point in the binary tree. 
And the additional space complexity is O(h), where h is the height of the tree since we need to store the results of every layer. 

In [8]:
node1 = BinaryTreeNode(314)
node2 = BinaryTreeNode(6)
node3 = BinaryTreeNode(6)
node4 = BinaryTreeNode(271)
node5 = BinaryTreeNode(561)
node6 = BinaryTreeNode(2)
node7 = BinaryTreeNode(271)
node8 = BinaryTreeNode(28)
node9 = BinaryTreeNode(0)
node10 = BinaryTreeNode(3)
node11 = BinaryTreeNode(1)
node12 = BinaryTreeNode(28)
node13 = BinaryTreeNode(17)
node14 = BinaryTreeNode(401)
node15 = BinaryTreeNode(257)
node16 = BinaryTreeNode(641)

In [9]:
node1.left = node2
node1.right = node3
node2.left = node4
node2.right = node5
node3.left = node6
node3.right = node7
node4.left = node8
node4.right = node9
node5.right = node10
node5.left = node13
node6.left = node14
node6.right = node11
node7.left = node16
node7.right = node12

In [14]:
is_binary_tree_bst(node1)

low range high range
-inf inf
low range high range
-inf 314
low range high range
-inf 6


False

In [62]:
node1 = BinaryTreeNode(19)
node2 = BinaryTreeNode(7)
node3 = BinaryTreeNode(43)
node4 = BinaryTreeNode(3)
node5 = BinaryTreeNode(11)
node6 = BinaryTreeNode(2)
node7 = BinaryTreeNode(5)
node8 = BinaryTreeNode(17)
node9 = BinaryTreeNode(13)
node10 = BinaryTreeNode(23)
node11 = BinaryTreeNode(47)
node12 = BinaryTreeNode(37)
node13 = BinaryTreeNode(29)
node14 = BinaryTreeNode(41)
node15 = BinaryTreeNode(31)
node16 = BinaryTreeNode(53)

In [63]:
node1.left = node2
node1.right = node3
node2.left = node4
node2.right = node5
node4.left = node6
node4.right = node7
node5.right = node8
node8.left = node9
node3.left = node10
node3.right = node11
node10.right = node12
node12.left = node13
node12.right = node14
node13.right = node15
node11.right = node16

In [26]:
is_binary_tree_bst(node1)

low range high range
-inf inf
low range high range
-inf 19
low range high range
-inf 7
low range high range
-inf 3
low range high range
-inf 2
low range high range
2 3
low range high range
3 7
low range high range
3 5
low range high range
5 7
low range high range
7 19
low range high range
7 11
low range high range
11 19
low range high range
11 17
low range high range
11 13
low range high range
13 17
low range high range
17 19
low range high range
19 inf
low range high range
19 43
low range high range
19 23
low range high range
23 43
low range high range
23 37
low range high range
23 29
low range high range
29 37
low range high range
29 31
low range high range
31 37
low range high range
37 43
low range high range
37 41
low range high range
41 43
low range high range
43 inf
low range high range
43 47
low range high range
47 inf
low range high range
47 53
low range high range
53 inf


True

Alternatively we can use the fact that an inorder traversal visits keys in sorted order. Furthermore, if an inorder traversal of a binary tree visits keys in sorted order, then that binary tree must be a BST. (This follows directly from the definition of a BST and the definition of an inorder walk.) Thus we can check the BST property by performing an inorder traversal, recording the key stored at the last visited node. Each time a new node is visited, its key s compared with the key of the previously visited node. If at any step in the walk, the key at the previoulsy visited node is greater than the node currently being visited, we have a violation of the BST property. 

All these approaches explore the left subtree first. Therefore, evern if the BST property does not hold at a node which is close to the root (e.g., the key stored at the right child is less than the key stored at the root), their time complexity is still O(n). 

We can search for violations of the BST property in a BFS manner, thereby reducing the time complexity when the property is violated at a node whose depth is small. 

Specifically, we can use a queue, where each queue entry contains a node, as well as an upper and a lower bound on the keys stored at the subtree rooted at that node. The queue is initialized to the a lower bound on the keys stored at the subtree rooted at that node. The queue is initialized to the root, with lower bound negative infinity and upper bound infinity. We iteratively check the constraint on each node. If it violates the constraint we stop --the BST property has been violated. Otherwise, we add its children along with the corresponding constraint. 

If the BST property is violated in a subtree consisting of nodes within a particular depth, the violation will be discovered without visiting any nodes at a greater depth. This is because each time we enqueue en entry, the lower and upper bounds on the node's key are the tightest possible. 

In [23]:
import collections

In [27]:
def is_binary_tree_bst_2(tree: BinaryTreeNode) -> bool:
    QueueEntry = collections.namedtuple('QueueEntry',
                                       ('node', 'lower', 'upper'))
    
    bfs_queue = collections.deque(
        [QueueEntry(tree, float('-inf'), float('inf'))])
    
    while bfs_queue:
        front = bfs_queue.popleft()
        if front.node:
            if not front.lower <= front.node.data <= front.upper:
                return False
            bfs_queue.extend(
                (QueueEntry(front.node.left, front.lower, front.node.data),
                 QueueEntry(front.node.right, front.node.data, front.upper)))
    return True

In [28]:
is_binary_tree_bst_2(node1)

True

The time complexity is O(n) and the additional space complexity is O(n).

## 14.2 Find the first key greater than a given value in a BST

Write a program that takes as input a BST and a value, and returns the first key that would appear in an inorder traversal which is greater than the input value. 

**Sol:** We can find the desired node in O(n) time, where n is the number of nodes in the BST, by doing an inorder walk. This approach does not use the BST properly. 

A better approach is to use the BST search idiom. We store the best candidate for the result and update candidate as we iteratively descend the tree, eliminating subtrees by comparing the keys stored at nodes with the input value. Specifically, if the current subtree's root holds a value less than or equal to the input value, we search the right subtree. If the current subtree's root stores a key that is greater than the input value, we search in the left subtree, updating the candidate to the current root. Correctness follows from the fact that whenever we first set the candidate, the desired result must be within the tree rooted at that node. 

In [29]:
def find_first_greater_than_k(tree:BstNode, k: int) -> BstNode:
    subtree, first_so_far = tree, None
    while subtree:
        if subtree.data > k:
            first_so_far = subtree
            subtree = subtree.left
        else:
            subtree = subtree.right
    return first_so_far

In [32]:
tree_traversal_inorder(find_first_greater_than_k(node1, 23))

Inorder: 29
Inorder: 31


The time complexity is O(h), where h is the height of the tree. The space complexity is O(1). 

## 14.3 Find the kth largest elements in a BST

A BST is a sorted data structure, which suggests that it should be possible to find the k largest keys easily. Write a program that takes as input a BST and an integer k, and returns the k largest elements in the BST in decreasing order. For example, if the input is the BST in Figure 14.1 and k = 3, your program should return <53,47,43>.

The brute-force approach is to do an inorder traversal, which enumerates keys in ascending order, and return the last k visited nodes. A queue is ideal for storing visited nodes, since it makes it easy to evict nodes visited more than k steps previously. A drawback of this approach is that it potentially processes many nodes that cannot possibly be in the result, e.g., if k is small and the left subtree is large. 

A better approach is to begin with the desired nodes, and work backwards. We do this by recursing first on the right subtree and then on the left subtree. This amounts to a reverse-inorder traversal. As soon as we visit k nodes, we can halt. The code below uses a dynamic array to store the desired keys. As soon as the array has k elements, we return. We store newer nodes at the end of the array, as per the problem specification. 

In [35]:
def find_k_largest_in_bst(tree:BstNode, k: int) -> list:
    def find_k_largest_in_bst_helper(tree):
        # Perform reverse inorder traversal.
        if tree and len(k_largest_elements) <k:
            find_k_largest_in_bst_helper(tree.right)
            if len(k_largest_elements) < k:
                k_largest_elements.append(tree.data)
                find_k_largest_in_bst_helper(tree.left)
    k_largest_elements = []
    find_k_largest_in_bst_helper(tree)
    return k_largest_elements

In [36]:
find_k_largest_in_bst(node1,5)

[53, 47, 43, 41, 37]

The time complexity is O(h+k), which can be much better than performing a conventional inorder walk, e.g., when the tree is balanced and k is small. The complexity bound comes from observation that the number of times the program descends in the tree can be at most h more than the number of times it ascends the tree, and each ascent happens after we visit a node in the result. After k nodes have been added to the result, the program stops.

## 14.4 Compute the LCA in a BST

Since a BST is a specialized binary tree, the notion of lowest common ancestor, as expressed in Problem 9.4, holds for BSTs too.

In general, computing the LCA of two nodes in a BST is no easier than computing the LCA in a binary tree, since the structurally a binary tree can be viewed as a BST where all the keys are equal. However, when the keys are distinct, it is possible to improve on the LAC algorithms for binary trees. 

Design an algorithm that takes as input a BST and two nodes, and returns the LCA of the two nodes. For example, for the BST in Figure 14.1 and nodes C and G, your algorithm should return B. Assume all keys are distinct. Nodes do not have references to their parents. 

The approach can be improved upon when operating on BSTs with distinct keys. Let s and b be the two nodes whose LCA we are to compute, and without loss of generality, assume that key at s is smaller. Consider the key stored at the root of the BST. There are four possibilities:
* If the root's key is the same as that stored at s or at b, we are done-- the root is the LCA.
* If the key at s is smaller than the key at the root, and the key at b is greater than the key at the root, the root is the LCA. 
* If the keys ar s and b are both smaller than that at the root, the LCA must lie in the left subtree of the root.
* If both keys are larger than that at the root, then the LCA must lie in the right subtree of the root. 

In [37]:
def find_lca(tree: BstNode, s:BstNode, b:BstNode) -> BstNode:
    while tree.data < s.data or tree.data >b.data:
        # Keep searching since the tree is outside of [s,b]
        while tree.data < s.data:
            tree = tree.right # LCA must be in tree's right child
        while tree.data > b.data:
            tree = tree.left # LCA must be in tree's left child 
    # Now, s.data <= tree.data && tree.data <= b.data
    return tree

In [38]:
tree_traversal_inorder(find_lca(node1, node12, node16))

Inorder: 23
Inorder: 29
Inorder: 31
Inorder: 37
Inorder: 41
Inorder: 43
Inorder: 47
Inorder: 53


In [39]:
tree_traversal_inorder(find_lca(node1, node6, node7))

Inorder: 2
Inorder: 3
Inorder: 5


In [40]:
tree_traversal_inorder(find_lca(node1, node2, node16))

Inorder: 2
Inorder: 3
Inorder: 5
Inorder: 7
Inorder: 11
Inorder: 13
Inorder: 17
Inorder: 19
Inorder: 23
Inorder: 29
Inorder: 31
Inorder: 37
Inorder: 41
Inorder: 43
Inorder: 47
Inorder: 53


Since we descend one level with each iteration, the time complexity is O(h), where h is the height of the tree. 

## 14.5 Recnostruct a BST from traversal data

As discussed in Problem 9.11 there are many different binary trees that yield the same sequence of visited nodes in an inodrder traversal. This is also true for preorder and postorder traversals. Given the sequence of nodes that an inorder traversal sequence visits and either of the other two traversal sequences, there exists a unique binary tree that yields those sequences. Here we study if it is possible to reconstruct the tree with less traversal information when the tree is known as BST. 

Suppose you are given the sequence in which keys are visited in an inorder traversal of a BST, and all keys are distinct. Can you reconstruct the BST from the sequence? If so, write a program to do so. Solve the same problem for preorder and postorder traversal sequences. 

**Sol:** First, with some experimentation, we see the sequence of keys generated by an inorder traversal is not enough to reconstruct the tree. For example, the key sequence <1,2,3> corresponds to five distinct BSTs.

However, the story for a preorder sequence is different. As an example, consider the preorder key sequence <43,23,37,29,31,41,47,53>. The root must hold 43, since it is the first visited node. The left subtree contains keys less than 43, i.e. 23, 37, 29, 31, 41, and the right subtree contains keys greater than 43, i.e., 47, 53. Furthermore, <23,37,29,31,41> ix exactly the preorder sequence of the left subtree and <47,53> is exactly the preorder sequence for the right subtree. We can recursively reason that 23 and 47 are the roots of the left and right subtree, and continue to build the entire tree, which is exactly the subtree rooted at Node I in Figure 14.1.

In [41]:
def rebuild_bst_from_preorder(
    preorder_sequence: list) -> BstNode:
    if not preorder_sequence:
        return None
    
    transition_point = next((i for i , a in enumerate(preorder_sequence)
                            if a > preorder_sequence[0]),
                           len(preorder_sequence))
    return BstNode(
        preorder_sequence[0],
        rebuild_bst_from_preorder(preorder_sequence[1: transition_point]),
        rebuild_bst_from_preorder(preorder_sequence[transition_point:]))

In [42]:
preorder_sequence = [43,23,37,29,31,41,47,53]

In [43]:
tree_traversal_inorder(rebuild_bst_from_preorder(preorder_sequence))

Inorder: 23
Inorder: 29
Inorder: 31
Inorder: 37
Inorder: 41
Inorder: 43
Inorder: 47
Inorder: 53


In [44]:
preorder_sequence = [19,7,3,2,5,11,17,13,43,23,37,29,31,41,47,53]

In [46]:
tree_traversal_inorder(rebuild_bst_from_preorder(preorder_sequence))

Inorder: 2
Inorder: 3
Inorder: 5
Inorder: 7
Inorder: 11
Inorder: 13
Inorder: 17
Inorder: 19
Inorder: 23
Inorder: 29
Inorder: 31
Inorder: 37
Inorder: 41
Inorder: 43
Inorder: 47
Inorder: 53


The worst-case input for this algorithm is the pre-order sequence corresponding to a left-skewed tree. The worst-case time complexity satisfies the recurrence W(n) = W(n-1) + O(n), which solves to O(n^2). The best-case input is a sequence corresponding to a right-skewed tree, and the corresponding time complexity is O(n). When the sequence corresponds to a balanced BST, the time complexity is given by B(n) = 2 B(n/2) + O(n), which solves to O(n log n). 

The implementation above potentially iterates over nodes multiple times, which is wasteful. A better approach is to reconstruct the left subtree in the same iteration as identifying the nodes which lie in it. The code shown below takes this approach. The intuition is that we do not want to iterate from first entry after the root to the last entry smaller than the root, not to go back and partially repeat this process for the root's left subtree. We can avoid repeated passes over nodes by inducing the range of keys we want to reconstruct the subtrees over. For example, looking at preorder key sequence <43,23,37,29,31,41,47,53>, instead of recursing on <23,37,29,31,41> (which would involve an iteration to get the last element in this sequence). We can derectly recur on <23,37,29,31,41,47,53>, with the constraint that we are building the subtree on nodes whose keys are less than 43. 

In [10]:
def rebuild_bst_from_preorder_2(preorder_sequence: list) -> BstNode:
    def rebuild_bst_from_preorder_on_value_range(lower_bound, upper_bound):
        if root_idx[0] == len(preorder_sequence):
            return None
        
        root = preorder_sequence[root_idx[0]]
        if not lower_bound <= root <= upper_bound:
            return None
        root_idx[0] += 1
        print(root_idx)
        # Note that rebuild_bst_from_preorder_on_value_range updates root_idx[0]
        # So the order of following two calls are critical
        left_subtree = rebuild_bst_from_preorder_on_value_range(
            lower_bound, root)
        right_subtree = rebuild_bst_from_preorder_on_value_range(
            root, upper_bound)
        return BstNode(root, left_subtree, right_subtree)
    
    root_idx =[0] # Track the current subtree
    return rebuild_bst_from_preorder_on_value_range(
        float('-inf'), float('inf'))


In [11]:
preorder_sequence = [19,7,3,2,5,11,17,13,43,23,37,29,31,41,47,53]

In [9]:
tree_traversal_inorder(rebuild_bst_from_preorder_2(preorder_sequence))

Inorder: 2
Inorder: 3
Inorder: 5
Inorder: 7
Inorder: 11
Inorder: 13
Inorder: 17
Inorder: 19
Inorder: 23
Inorder: 29
Inorder: 31
Inorder: 37
Inorder: 41
Inorder: 43
Inorder: 47
Inorder: 53


In [12]:
rebuild_bst_from_preorder_2(preorder_sequence)

[1]
[2]
[3]
[4]
[5]
[6]
[7]
[8]
[9]
[10]
[11]
[12]
[13]
[14]
[15]
[16]


<__main__.BstNode at 0x110588240>

The worst-case time complexity is O(n), since it performs a constant amount of work per node. Note the similarity to Solution 24.19 on Page 407. 

A postorder traversal sequence also uniquely specifies the BST, and the algorithm for reconstructing the BST is very similar to that for the preorder case. 

## 14.6 Find the closest entries in three sorted arrays

Design an algorithm that takes three sorted arrays of integers and returns the lenght of a shortest inverval that includes at least one entry from each array. A single value may be present in more than one array, but you can assume there are no duplicate values within a single array. For example, if the three arrays are <5,10,15>, <3,6,9,12,15>, and <8,1,24>, then the answer is 1; it corresponds to the interval [15,16], which includes 15, 15 and 16 from the first, second, and third arrays. 

By iteratively examing and removing the smallest element from the triple, we compute the minimum interval starting at that element. Since the minimum interval containing eleemnts from each array must begin with the element of some array, we are guaranteed to encounter the minimum element. Since we need to repeatedly insert, delete, find the minimum, and find the maximum amongst a collection of k elements, a BST is the natural choice. 

In [14]:
def find_closest_elements_in_sorted_arrays(
    sorted_arrays: list) -> int:
    # Stores array iterators in each entry.
    iters = bintrees.RBTree()
    for idx, sorted_aray in enumerate(sorted_arrays):
        it = iter(sorted_array)
        first_min = next(it, None)
        if first_min is not None:
            iters.insert((first_min, idx), it)
        
    min_distance_so_far = float('inf')
    while True:
        min_value, min_idx = iters.min_key()
        max_value = iters.max_key()[0]
        min_distance_so_far = min(max_value - min_value, min_distance_so_far)
        it = iters.pop_min()[1]
        next_min = next(it, None)
        # Return if some array has no remaining elements.
        if next_min is None:
            return typing.cast(int, min_distance_so_far)
        iter.insert((next_min, min_idx), it)

The time complexity is O(n log k), where n is the total number of elements in the k arrays. For the special case k = 3 specified in the problem statement, the time complexity is O(n log 3) = O(n). 

## 14.7 Enumerate extended integers 

Numbers of the form a+b$\sqrt{q}$, where a and b are nonnegative integers, and q is an integer which is not the square of another integer, have special properties, e.g., they are closed under addition and multiplication. 

Design an algorithm for efficiently computing the k smallest numbers of the form a+ b $\sqrt{2}$ for nonnegative integers a and b. 

**Sol:** We know the smallest number is 0+0 $\sqrt{2}$. The candidates from next smallest number are 1+0$\sqrt{2}$ and 0 +1$\sqrt{2}$. From this, we can deduce the following algorithm. We want to maitain a collection of real numbers, initialized to 0+0 $\sqrt{2}$. We perform k extractions of the smallest element, call it a+b$\sqrt{2}$, followed by insertion of (a+1)+b$\sqrt{2}$ and a+ (b+1)$\sqrt{2}$ to the collection. 

The operations on this collection are extract the minimum and insert. Since it is possible that the same number may be inserted more than once, we need to ensure the collection does not create duplicates when the same item is inserted twice. A BST satisfies these operations efficiently, and from the BST, which is 0+0 $\sqrt{2}$, and insert 1+ 0$
\sqrt{2}$ and 0 + 1 $\sqrt{2}$. We extract the minimum from BST, which is 1 + 0$\sqrt{2}$, and insert 2 + 0$\sqrt{2}$ and 1+1$\sqrt{2}$ to the BST... 

In [16]:
import math

In [17]:
class Number:
    def __init__(self,a,b):
        self.a, self.b = a, b
        self.val = a+ b*math.sqrt(2)
        
    def __lt__(self, other):
        return self.val < other.val
    
    def __eq__(self, other):
        return self.val == other.val
    
    
def generate_first_k_a_b_sqrt2(k:int) -> list:
    # Initial for 0 + 0 sqrt(2).
    candidates = bintrees.RBTree([(Number(0, 0), None)])
    
    result = []
    while len(result) < k:
        next_smallest = candidates.pop_min()[0]
        result.append(next_smallest.val)
        # Adds the next two numbers derived from next_smallest.
        candidates[Number(next_smallest.a +1, next_smallest.b)] = None
        candidates[Number(next_smallest.a, next_smallest.b + 1)] = None
    return result 

In each iteration we perform a deletion and two insertions. There are k such insertions, so the time complexity is O(k log k). The space complexity is O(k), since there are not more than 2k insertions. 

In [19]:
def generate_first_k_a_b_sqrt2_2(k : int) -> list:
    # Will store the first k numbers of the form a + b sqrt(2).
    cand = [Number(0,0)]
    i = j = 0
    for _ in range(1,k):
        cand_i_plus_1 = Number(cand[i].a+1,cand[i].b)
        cand_j_plus_sqrt2 = Number(cand[j].a, cand[j].b +1)
        cand.append(min(cand_i_plus_1, cand_j_plus_sqrt2))
        if cand_i_plus_1.val == cand[-1].val:
            i += 1
        if cand_j_plus_sqrt2.val == cand[-1].val:
            j += 1
    return [a.val for a in cand]
        

In [20]:
generate_first_k_a_b_sqrt2_2(5)

[0.0, 1.0, 1.4142135623730951, 2.0, 2.414213562373095]

In [21]:
generate_first_k_a_b_sqrt2_2(7)

[0.0, 1.0, 1.4142135623730951, 2.0, 2.414213562373095, 2.8284271247461903, 3.0]

In [53]:
def generate_first_k_a_b_sqrt2_3(k : int) -> list:
    cand = [Number(0,0)]
    j = 1
    t = 1
    while True:
        print(j)
        for i in range(j,-1,-1):
            print(i)
            cand.append(Number(i,j-i))
            t += 1
            if t >= k:
                return [a.val for a in cand]
        j = j+1
            
    return [a.val for a in cand]
        

In [54]:
generate_first_k_a_b_sqrt2_3(5)

1
1
0
2
2
1


[0.0, 1.0, 1.4142135623730951, 2.0, 2.414213562373095]

In [55]:
generate_first_k_a_b_sqrt2_3(7)

1
1
0
2
2
1
0
3
3


[0.0, 1.0, 1.4142135623730951, 2.0, 2.414213562373095, 2.8284271247461903, 3.0]

This approach is easier since 1 < $\sqrt{2}$. Then add the coefficient of 1 will also be smaller than add the coefficient of $\sqrt{2}$. We increasing the sum of coefficient. Descreases the coefficient of 1 while increases the coefficient of $\sqrt{2}$. Add to the candidates until reach the total number of k. 

Each additional element takes O(1) time to compute, implying an O(n) time complexity to compute the first n values of the form a + b $\sqrt{2}$. 

## 14.8 Build a minimum height BST from a sorted array

Given a sorted array, the number of BSTs that can be build on the entries in the array grows enormously with its size. Some of these trees are skewed, and are closer to lists; others are more balanced. 

How would you build a BST of minimum possible height from a sorted array? 

**Sol:** Intuitively, to make a minimum height BST, we want the subtrees to be as balanced as possible-- there is no point in one subtree being shorter than the other, since the height is determined by the taller one. More formally, balance can be achieved by keeping the number of nodes in both subtrees as close as possible. 

Let n be the length of the array. To achieve optimum balance we can make the element in the middle of the array, i.e., the [n/2]th entry, the root, and recursively compute minimum height BSTs for the subarrays on either side of this entry. 

As a concrete example, if the array is <2,3,5,7,11,13,17,19,23>, the root's key will be the middle element, i.e., 11. This implies the left subtree is to be build from <2,3,5,7>, and the right subtree is to be built from <13,17,19,23>. To make both of these minimum height, we call the procedure recursively. 

In [56]:
def build_min_height_bst_from_sorted_array(A : list) -> BstNode:
    def build_min_height_bst_from_sorted_subarray(start, end):
        if start >= end:
            return None
        mid = (start + end)//2
        return BstNode(A[mid],
                      build_min_height_bst_from_sorted_subarray(start, mid),
                      build_min_height_bst_from_sorted_subarray(mid+1 , end))
    
    return build_min_height_bst_from_sorted_subarray(0, len(A))

In [57]:
A = [2,3,5,7,11,13,17,19,23]
balanced_tree = build_min_height_bst_from_sorted_array(A)

In [58]:
tree_traversal_inorder(balanced_tree)

Inorder: 2
Inorder: 3
Inorder: 5
Inorder: 7
Inorder: 11
Inorder: 13
Inorder: 17
Inorder: 19
Inorder: 23


In [59]:
A = [1,2,3,4,5,6,7,8,9,10]

In [60]:
balanced_tree = build_min_height_bst_from_sorted_array(A)
tree_traversal_inorder(balanced_tree)

Inorder: 1
Inorder: 2
Inorder: 3
Inorder: 4
Inorder: 5
Inorder: 6
Inorder: 7
Inorder: 8
Inorder: 9
Inorder: 10


The time complexity T(n) satisfies the recurrence T(n) + 2T(n/2) + O(1), which solves to T(n) = O(n). Another explanation for the time complexity is that we make exactly n calls to the recursive function and spend O(1) within each call. 

## 14.9 Test if three BST nodes are totally ordered 

Write a program which takes two nodes in a BST and a third node, the "middle" node, and determines if one of the two nodes is a proper ancestor and the other a proper descendantof the middle. (A proper ancestor of a node is an ancestor that is not equal to the node; a proper descendant is defined similarly.)

**Sol:** A brutef-force solution would be check if the first node is a proper ancestor of the middle and the second node is a proper descendant of the middle. If this check returns true, we return true. Otherwise, we return the result of the same check, swapping the roles of the first and second nodes. 

Searching has time complexity O(h), where h is the height of the tree, since we can use the BST property to prune one of the two children at each node. Since we perform a maximum of three searches, the total time complexity is O(h). One disadvantage of trying the two input nodes for being the middle's ancestor one-after-another is that even when the three nodes are very close, e.g., if the two nodes are {A,J} and the middle node is I in Figure 14.1, if we begin the search for the middle from the lower of the nodes, e.g., from J, we incur the full O(h) complexity. 

We can prevent this by performing the searches for the middle from both alternatives in an interleaved fashiion. If we encounter the middle from one node, we subsequently search for the second node from the middle. This way we avoid performing an unsuccessful search on a large subtree. 


In [61]:
def part_includes_ancestor_and_descendant_of_m(possible_anc_or_desc_0: BstNode,
                                              possible_anc_or_desc_1: BstNode,
                                              middle: BstNode) -> bool:
    search_0, search_1 = possible_anc_or_desc_0, possible_anc_or_desc_1
    
    # Perform interleaved searchin from possible_anc_or_desc and 
    # possible_anc_or_desc for middle 
    while (search_0 is not possible_anc_or_desc_1 and search_0 is not middle
          and search_1 is not possible_anc_or_desc_0 
          and search_1 is not middle and (search_0 or search_1)):
        if search_0:
            search_0 = (search_0.left
                       if search_0.data > middle.data else search_1.right)
        if search_1:
            search_1 = (search_1.left
                       if search_0.data > middle.data else search_1.right)
            
    # If both searches were unsuccessful, or we got from 
    # possible_anc_or_desc_0 to possible_anc_or_desc_1 without seeing middle, 
    # or from possible_anc_desc_1 to possible_anc_or_desc_0 without seeing middle, 
    # middle cannot lie between possible_anc_or_desc_0 and possible_anc_or_desc_1.
    
    if ((search_0 is not middle and search_1 is not middle)
           or search_0 is possible_anc_or_desc_1
           or search_1 is possible_anc_or_desc_0):
        return False
    
    def search_target(source, target):
        while source and source is not target:
            source = source.left if source.data > target.data else source.right
        return source is target 
    
    # If we get here, we already know one of possible_anc_or_desc_0 or 
    # possible_anc_to_desc_1 has a path to middle. Check if middle has a path to 
    # possible_anc_to_desc_1 or to possible_anc_to_desc_0.
    return search_target(
        middle, possible_anc_or_desc_1
        if search_0 is middle else possible_anc_or_desc_0)

In [64]:
part_includes_ancestor_and_descendant_of_m(node3, node12, node10)

True

In [65]:
part_includes_ancestor_and_descendant_of_m(node12, node3, node10)

True

In [67]:
part_includes_ancestor_and_descendant_of_m(node11, node3, node12)

AttributeError: 'NoneType' object has no attribute 'data'

When the middle node does have an ancestor and descendant in the pair, the time complexity is O(d), where d is the difference between the depths of the ancestor and descendant. The reason is that the interleaved search will stop when the ancestor reaches the middle node, i.e., after O(d) iterations. The search from the middle node to the descendant then takes O(d) steps to succeed. When the middle node does not have an ancestor and descendant in the pair, the time complexity is O(h), which corresponds to a worst-case search in a BST. 

## 14.10 The range lookup problem 

Consider the problem of developing a web-service that takes a geographical location, and returns the nearest restaurant. The service starts with a set of restaurant locations-- each location includes X and Y--cordinates. A query consist of a location, and should return the nearest restaurant (ties can be broken arbitrarily). 

One approach is to build two BSTs on the restaurant locations: T_X sorted on the X coordinates, and T_Y sorted on the Y coordinates. A query on location (p,q) can be performed by finding all the restaurants whose X coordinate is in the interval[p-D,p+D], and all the restaurants whose Y coordinate is in the interval [q-D, q+D], taking the intersection of these two sets, and finding the restaurant in the intersection which is closest to (p,q). Heuristically, if D is chosen correctly, the subsets are small and a brute-force search for the closest point is fast. One approach is to start with a small value for D and keep doubling it until the final intersection in nonempty. 

Write a program that takes as input a BST and an interval and returns the BST keys that lie in the interval. For example, for the tree in Figure 14.1 on Page 212, and interval [16,31], you should return 17,19,23,29,31. 

**Sol:** By the property of BST, we have 
* If the root of the tree holds a key that is less than the left endpoint of the interval, the left subtree cannot contain any node whose key lies in the interval. 
* If the root of the tree holds a key that is greater than the right endpoint of the interval, the right subtree cannot contain any node whose key lies in the interval.
* Otherwise, the root of the tree holds a key that lies within the interval, and it is possible for both the left and right subtrees to contain nodes whose keys lie in the interval. 

In [68]:
import collections

In [69]:
Interval = collections.namedtuple('Interval', ('left', 'right'))

def range_lookup_in_bst(tree: BstNode, interval:Interval) -> list:
    def range_lookup_in_bst_helper(tree):
        if tree is None:
            return 
        
        if interval.left <= tree.data <= interval.right:
            # tree.data lies in the interval.
            range_lookup_in_bst_helper(tree.left)
            result.append(tree.data)
            range_lookup_in_bst_helper(tree.right)
            
        elif interval.left > tree.data:
            range_lookup_in_bst_helper(tree.right)
        
        elif interval.right < tree.data:
            range_lookup_in_bst_helper(tree.left)
            
    result = []
    range_lookup_in_bst_helper(tree)
    return result 

In [70]:
interval = Interval(16,31)

In [71]:
range_lookup_in_bst(node1,interval)

[17, 19, 23, 29, 31]

In [72]:
interval = Interval(16,42)
range_lookup_in_bst(node1,interval)

[17, 19, 23, 29, 31, 37, 41]

Looking carefully at the nodes we recurse on, we see these nodes can be partitioned into three subsets--nodes on the search path to 16, nodes on the search path to 42, and the rest. All nodes in the third subset must lie in the result, but some of the nodes in the first two subsets may or may not lie in the result. The traversal spends O(h) time visiting the first two subsets, and O(m) time traversing the third subset, where m is the number of keys that lie in the specified interval. Each edge is visited twice, once downwards, and once upwards? Total time complexity is O()

In [1]:
class BinaryTreeNode:
    def __init__(self, data=None, left= None, right = None):
        self.data = data 
        self.left = left 
        self.right = right 

In [2]:
def tree_traversal(root: BinaryTreeNode) -> None:
    if root:
        # Preorder: Processes the root before the traversals of left 
        # and right children 
        print('Preorder: %d' % root.data)
        tree_traversal(root.left)
        # Inorder: Processes the root after the traversal of left 
        # child and before the traversal of right child 
        print('Inorder: %d' % root.data)
        tree_traversal(root.right)
        # Postorder: Processes the root after the traversals of left 
        # right child 
        print ('Postorder: %d' % root.data)

# Leetcode Binary Search Problem 

## 35. Search Insert Position 

Given a sorted array and a target value, return the index if the target is found. If not, return the index where it would be if it were inserted in order. 

You may assume no duplicates in the array. 

**Example 1:**
Input: [1,3,5,6], 5
Output: 2

**Example 2:**
Input: [1,3,5,6], 2
Output: 1 

**Example 3:**
Input: [1,3,5,6], 7
Output: 4

**Example 4:**
Input: [1,3,5,6], 0
Output: 0

In [11]:
def searchInsert(nums:list, target: int) -> int:
    
    for i in range(len(nums)):
        if nums[i] >= target:
            return i 

    return (i+1)

In [12]:
nums = [1,3,5,6]
target = 5

In [13]:
searchInsert(nums, target)

2

In [14]:
target = 2

In [15]:
searchInsert(nums, target)

1

In [16]:
target = 7

searchInsert(nums, target)

4

In [17]:
target = 0

searchInsert(nums, target)

0

## 34. Find First and Last Position of Element in Sorted Array

Given an array of integers nums sorted in ascending order, find the starting and ending position of a given target value. 

Your algorithm's runtime complexity must be in order or O(log n). 

If the target is not found in array, return [-1,-1]

**Example 1:**
Input: nums = [5,7,7,8,8,10], target = 8
Output: [3,4]

**Example 2:**
Input: nums = [5,7,7,8,8,10], target = 6
Output: [-1, -1]

In [32]:
def searchRange(nums: list, target: int) -> list:
    
    
    def find_m(nums: list, target: int) -> int:

        ll = 0
        rr = len(nums) 


        while ll < rr :
            m = (ll+rr)//2

            if nums[m] > target:
                rr = m 
            elif nums[m] < target:
                ll = m + 1
        
            elif nums[m] == target:
                return m 
        return (-1)
    
    m =  find_m(nums, target)
    print(m)
    if m == -1:
        return [-1,-1]
    
    l = 0
    r = len(nums ) 
    
    while l < m:
        m1 = (l+m) //2
        
        if nums[m1] < target:
            l = m1 + 1
        elif nums[m1] == target:
            break
    
    while m < r:
        m2 = (r + m) //2
        
        if nums[m2] > target:
            r = m2
        elif nums[m2] == target:
            break 
            
    if m1 <= m and m2>= m:
        return [m1,m2]
    else: 
        return [-1, -1]
  

In [33]:
nums = [5,7,7,8,8,10] 
target = 8

searchRange(nums, target)

4


[3, 4]

In [34]:
target = 6

searchRange(nums, target)

-1


[-1, -1]

In [51]:
def searchRange_2(nums: list, target: int) -> int:
    
    def lower_bound(A: list, target: int) -> int:
        
        l = 0
        r = len(A) 
        while l < r:
            m = l + (r-l)//2
            if A[m] >= target:
                r = m
            else:
                l = m+1
        return l 
    
    def upper_bound(A: list, target: int) -> int:
        l = 0
        r = len(A)
        
        while l< r:
            m = l +(r-l)//2
            if A[m] > target:
                r = m 
            else:
                l = m + 1
        return l
    
    lw = lower_bound(nums, target)
    
    up = upper_bound(nums, target)
    
    if lw == up:
        return [-1, -1]
    else:
        return [lw, up-1]
    
    
    
    print(lw, up)

In [52]:
nums = [5,7,7,8,8,10] 
target = 6

searchRange_2(nums, target)

[-1, -1]

In [53]:
target = 8

searchRange_2(nums, target)

[3, 4]

In [54]:
nums = [2,2]
target = 2
searchRange_2(nums, target)

[0, 1]

In [55]:
nums = [1]
target = 1
searchRange_2(nums, target)

[0, 0]

In [56]:
nums = [1]
target = 0
searchRange_2(nums, target)

[-1, -1]

Time Complexity: O(log(n))

Space Complexity: O(1)

## 64. Sqrt(x)

Implement int sqrt(int x). 

Compute and return the square root of x, where x is guaranteed to be a non-negative integer. 

In [57]:
def mySqrt(x: int) -> int:
    l = 0
    r = x+1
    while l< r:
        m = l + (r-l)//2
        if m * m > x:
            r = m 
        else:
            l = m +1
    return l -1 

Time Complexity: O(log n )

Space Complexity: O(1)

In [58]:
mySqrt(4)

2

In [59]:
mySqrt(8)

2

In [60]:
mySqrt(10)

3

## 278. First Bad Version 

You are a product manager and currently leading a team to develop a new product. Unfortunately, the latest version of your product fails the quality check. Since each version is developed based on the previous version, all the versions after a bad version are also bad. 

Suppose you have n versions [1,2,...,n] and you want to find out the first bad one, which causes all the following ones to be bad. 

You are given an API bool isBadversion(version) which will return whether version is bad. Implement a function to find the first bad version. You should minimize the number of calls to the API. 

In [61]:
class Solution:
    def firstBadVersion(self, n):
        """
        :type n: int
        :rtype: int
        """
        
        l = 0
        r = n 
        while l < r:
            m = l+ (r-l)//2
            if isBadVersion(m):
                r = m
            else:
                l = m+1 
        return l 
        

## 875. Koko Eating Bananas

Koko loves to eat bananas. There are N piles of bananas, the i-th pile has pile[i] bananas. The guards have gone and will come back in H hours. 

Koko can decide her bananas-per-hour eating speed of K. Each hour, she chooses some pile of bananas, and eats K bananas from that pile. If the pile has less than K bananas, she eats all of them instead, wnad won't eat any more bananas during this hour. 

Koko likes to eat slowly, but still wants to finish eating all the bananas before the guards come back.

Return the minimum integer K such that she can eat all the bananas with H hours. 

**Example 1:**
Input: piles = [3,6,7,11], H = 8

Output: 4

Explanation: 3/4 =1 + 6/4 = 2 + 7/4 = 2 + 11/4 = 3 =8 

In [67]:
(6 + 4 -1 )//4

2

In [70]:
def minEatingSpeed(piles: list, H: int) -> int:
    
    l = 1
    r = max(piles) + 1
    
    while l < r:
        m = l+ (r-l)//2
        h = 0
        for p in piles:
            h += (p+ m- 1)//m
        if h <= H:
            r = m 
        else: 
            l = m+1
    return l 
    
    

In [72]:
piles = [3,6,7,11]
H = 8
minEatingSpeed(piles, H)

4

In [73]:
piles = [30, 11, 23, 4, 20]
H = 5
minEatingSpeed(piles, H)

30

In [74]:
piles = [30, 11, 23, 4, 20]
H = 6
minEatingSpeed(piles, H)

23

## 378. Kth Smallest Element in a Sorted Matrix 

Given a n $\times$ n matric where each of the rows and columns are sorted in ascending order, find the kth smallest element in the matrix. 

Note that it is the kth smallest element in the sorted order, not the kth distinct elemnt.

**Example:**
matrix = [ 
[1, 5, 9], 
[10, 11, 13],
[12, 13, 15]
]
k = 8

return 13. 

In [11]:
def kthSmallest(matrix: list, n : int) -> int:
    
    l = matrix[0][0]
    r = matrix[-1][-1]
    
    while l < r:
        m = l + (r-l)//2
        total = 0
        for row in matrix:
            total += upper_bound(row, m)
        if total >= n:
            r = m
        else:
            l = m+1 
    return l

def upper_bound(nums: list, n: int) -> int:
    l = 0
    r = len(nums)
    
    while l< r:
        m = l+ (r-l)//2
        if nums[m] > n:
            r = m 
        else:
            l = m +1 
            
    return l 

In [12]:
matrix = [[1,5,9], [10,11,13], [12,13,15]]

kthSmallest(matrix, 8)

13