This week we will implement some classic divide and conquer algorithms and
discuss the problems on HW 4.

## Binary search

#### Recall

We can determine in $O(\log n)$ time whether a given an element $x$ is in a
sorted array $A.$ To do this, we compare the middle element of $A$ with
$x,$ and because $A$ is sorted, this eliminates at least one half of $A.$
For example, if the middle element is larger than $x,$ then $x$ cannot be in
the right half of $A,$ and if the middle element is equal to $x,$ then we are
already done.

In [None]:
def binary_search(A, x):
    '''
    Whether the element `x` is in the sorted array `A`
    '''
    def binary_search_helper(i, j):
        '''
        Whether the element `x` is in the subarray A[i: j]
        '''
        if i == j: # the subarray is empty
            return False
        elif i == j + 1: # the subarray has only one element
            return A[i] == x
        else:
            middle = (i + j) // 2
            if A[middle] == x:
                return True
            elif A[middle] < x:
                return binary_search_helper(middle + 1, j)
            else: # A[middle] > x
                return binary_search_helper(i, middle)
    return binary_search_helper(0, len(A))

A = [2, 2, 4, 4, 6, 6, 6, 7, 8, 10, 11, 13]
assert binary_search(A, 2)
assert binary_search(A, 4)
assert binary_search(A, 6)
assert binary_search(A, 7)
assert binary_search(A, 8)
assert binary_search(A, 10)
assert binary_search(A, 11)
assert binary_search(A, 13)
assert not binary_search(A, 1)
assert not binary_search(A, 3)
assert not binary_search(A, 5)
assert not binary_search(A, 9)
assert not binary_search(A, 12)
assert not binary_search(A, 14)

#### HW 4 Problem 1

In this problem, we are given an array $A$ that is sorted in strictly
increasing order, i.e. $A[i] < A[i + 1]$ for all relevant $i,$ and we want
to determine whether there exists a fixed point, i.e. some $i$ such that
$A[i] = i.$ We will index starting from $0.$

The following algorithm works, but it is $O(n).$ In discussion section, we will
discuss how we are led to this algorithm, why it is $O(n),$ and how it can be
improved to $O(\log n).$

In [23]:
def fixed_point(A):
    B = [A[i] - i for i in range(len(A))]
    return binary_search(B, 0)

assert fixed_point([-3, -2, -1, 2, 4, 6, 7]) # 4 is a fixed point
assert fixed_point([-7, -5, -3, -1, 1, 3, 5, 7]) # 7 is a fixed point
assert not fixed_point([-1, 0, 1, 2, 3, 4])
assert not fixed_point([-8, -7, 4, 5])

## Merge sort

#### Recall

We can sort a list in $O(n \log n)$ time, by recursively sorting the left and
right halves and combining the sorted halves in linear (with respect to the
length of the halves) time.

In [None]:
def merged(A, B):
    i = j = 0
    ans = []
    while i < len(A) or j < len(B):
        if i == len(A):
            ans.append(B[j])
            j += 1
        elif j == len(B):
            ans.append(A[i])
            i += 1
        elif A[i] < B[j]:
            ans.append(A[i])
            i += 1
        else: # B[j] <= A[i]
            ans.append(B[j])
            j += 1
    return ans

assert merged([1, 5, 7, 8, 8], [0, 1, 1, 3, 5]) == [0, 1, 1, 1, 3, 5, 5, 7, 8, 8]
assert merged([0, 1, 2], [0, 1, 2]) == [0, 0, 1, 1, 2, 2]
assert merged([1, 3, 5], [2, 4, 6]) == [1, 2, 3, 4, 5, 6]
assert merged([0], [0]) == [0, 0]
assert merged([], []) == []


def merge_sorted(A):
    if len(A) in {0, 1}:
        return A
    else:
        middle = len(A) // 2
        return merged(merge_sorted(A[: middle]), merge_sorted(A[middle :]))
    
A = [7, 5, 3, 1, -1, 3, 6, 8, -2, -5, -5, -3, 2, 1]
assert merge_sorted(A) == sorted(A)
A = [9, 8, 7, 6, 5, 4, 3, 2, 1]
assert merge_sorted(A) == sorted(A)
A = [3, 2, 1]
assert merge_sorted(A) == sorted(A)

#### HW Problem 4 Problem 3

In this problem, we are given a list $L,$ and we want to count the number of
special inversions, i.e. $i, j$ such that $i < j$ and $L[i] > 2L[j].$

In discussion section, we will discuss the solution to this problem and
implement it by filling in the blanks in the following code.

In [None]:
def n_crossing_special_inversions(A, B):
    '''
    Assuming `A` and `B` are respectively the sorted left and right halves of
    an array `M`, returns the number of special inversions `i, j` in `M` where
    `i` is an index in the left half and `j` is an index in the right half.
    '''
    # TODO
    pass

assert n_crossing_special_inversions([3, 4, 6], [0, 1, 2]) == 2 + 2 + 3
assert (
    n_crossing_special_inversions([2, 3, 5, 7, 9, 10], [0, 0, 2, 3, 3, 4])
    == 2 + 2 + 3 + 5 + 6 + 6
)
assert (
    n_crossing_special_inversions([0, 1, 1, 2, 3, 4], [2, 2, 3, 4, 4, 5])
    == 0
)


def n_special_inversions(L):
    '''
    The number of special inversions in L
    '''
    def n_special_inversions_helper(M):
        '''
        A tuple consisting of the number of special inversions in M and M
        sorted.
        '''
        # TODO
        pass
    # TODO
    # pass

assert n_special_inversions([5, 3, 1]) == 2
assert n_special_inversions([6, 4, 3, 2, 7, 5, 1]) == 6
assert n_special_inversions([7, 6, 5, 4, 3, 2, 1]) == 9
assert n_special_inversions([0, 1, 2, 3, 4, 5]) == 0

## Miscellaneous divide and conquer

#### HW 4 Problem 2

In this problem, we are given an array $L$ of length $n$ and want to find the elements of $L$ that occur more than $n/4$ times.

In discussion section we will discuss why the following divide and conquer
algorithm is $O(n \log n).$

In [None]:
def count(L, x):
    '''
    The number of occurences of `x` in `L`, in O(n) time.
    '''
    count = 0
    for i in range(len(L)):
        if L[i] == x:
            count += 1
    return count

def unique_elements(C):
    '''
    The unique elements in `C`, returned as a sorted list. This works in
    O(n + k^2) time, where n is the length of `C` and k is the number of
    unique elements. One can probably also divide and conquer this to make it
    more efficient, but for our purposes this is not the bottleneck.
    '''
    ans = []
    for i in range(len(C)):
        if C[i] not in ans: # linear in the length of ans
            ans.append(C[i])
    return sorted(ans)

def frequent_elements(L, p):
    '''
    The elements in `L` that occur more than `p * len(L)` times. Here `p` is
    a real number such that `0 <= p < 1`.
    '''
    if len(L) in {0, 1}:
        return L 
    else:
        middle = len(L) // 2
        frequent_elements_left = frequent_elements(L[: middle], p)
        frequent_elements_right = frequent_elements(L[middle: ], p)
        return unique_elements([
            x
            for x in frequent_elements_left + frequent_elements_right
            if count(L, x) > p * len(L)
        ])
    
assert frequent_elements([3, 3, 5, 5, 6], 0) == [3, 5, 6]
assert frequent_elements([3, 3, 5, 5, 6], 1 / 3) == [3, 5]
assert frequent_elements([3, 3, 5, 5, 6], 1 / 2) == []
assert frequent_elements([3, 3, 5, 5, 6], 2 / 5) == []
assert frequent_elements([2, 5, 7, 8, 7, 5, 2, 5, 7, 7, 2, 8], 1 / 4) == [7]
assert frequent_elements([1, 1, 2, 2], 1 / 4) == [1, 2]
assert frequent_elements([1, 2, 3, 4], 1 / 4) == []
assert frequent_elements([1, 2, 3, 4], 1 / 5) == [1, 2, 3, 4]
