*** Algorithms is about finding structures in your data and making use of them. ***

# Array

## Patterns of Array

### search 
- for single, or group of, or beginning of certain elements 
- usually involves extended binary search (keep looking even after you found one)
- with help of data strucutures such as heap, bst

### pairs
- pairs of constraints, e.g., sum, diff equal to k
    - method 1: using hashing
    - method 2: sort the array, use `two pointers approach` (invariants)
        - for sum (or symmetry in pairs), use head-tail pointers
        - for diff (or asymmetry in pairs), use head-head pointers (equivalent to negating the array first)
- pairs of min/max
    - usually it is starightforward enough after sorting the array
 
### subarrays - solutions usually depend on the representations
- represented as arbitrary gap between i, and j (e.g., sum of subarray closest to 0)
- represented as two pointers (for min/max), one is fast one is slow, the optimal subarray solution is usually represented as the gap of the two pointers, e.g., (max sum of subarray)
- represented as two pointers (for csp), right is moving first to meet the constraint, and left moves to form another solution - kinda like backtracking, but be careful when the cumsum is not strictly increasing, i.e., not all elements are positive

## Find all pairs from two sorted array where the sum is k
- method 1: iterate one array and bin-search in the other, nlog(m) complexity
- method 2: hashing, always think of hashing for arrays O(n) + O(m)

In [1]:
xs1 = [1, 3, 5, 7]
xs2 = [2, 3, 5, 8]
k = 10

In [2]:
## method 1 binary search by taking advantage of sorted array
## nlog(m) complexity

import bisect
def found(xs, x):
    i = bisect.bisect_left(xs, x)
    return i < len(xs) and xs[i] == x

def sum_pairs(xs1, xs2, k):
    result = set()
    for x1 in xs1:
        x2 = k - x1
        if found(xs2, x2):
            result.add((x1, x2))
    return result
        

assert sum_pairs(xs1, xs2, k) == {(5, 5), (7, 3)}

In [3]:
## method 2 - hashing 
def sum_pairs(xs1, xs2, k):
    h1 = set()
    result = set()
    for x1 in xs1:
        h1.add(x1)
    for x2 in xs2:
        if (k-x2) in h1:
            result.add( (k-x2, x2) )
    return result

assert sum_pairs(xs1, xs2, k) == {(5, 5), (7, 3)}

## Find k numbers with most occurences in the given array
- Use a hash to count the occurances and then use a max heap to go through the hash - return the larger number when frequence calls a tie
- O(n) complexity

In [41]:
xs = [3, 1, 4, 4, 5, 2, 6, 1, 3]
k = 2

from collections import defaultdict
import heapq

def top_k(xs, k):
    counts = defaultdict(lambda : 0)
    for x in xs:
        counts[x] += 1
    counts = iter(counts.items())
    heap = []
    for _ in range(k):
        x, c = next(counts)
        heap.append((c, x))
    heapq.heapify(heap)
    for x, c in counts:
        heapq.heappushpop(heap, (c, x))
    return [x for c,x in heapq.nlargest(k, heap)]

assert top_k(xs, k) == [4, 3]

## find the subarray of an unsorted array, which has sum closest to 0

In [1]:
def zero_sub(xs):
    accum_sum = []
    s = 0
    for i, x in enumerate(xs):
        s += x
        accum_sum.append((i, s))
    accum_sum = sorted(accum_sum, key=lambda kv: kv[1])
#     print(accum_sum)
    start, end = 0, 0
    min_sum = abs(sum(xs))
    for i in range(1, len(accum_sum)):
        i1, s1 = accum_sum[i-1]
        i2, s2 = accum_sum[i]
        s = s2 - s1 if i1 < i2 else s1 - s2
        if abs(s) < abs(min_sum):
            min_sum = s
            start, end = min(i1, i2), max(i1, i2)
    return (start+1, end, min_sum)

xs = [-1, 3, 2, -5, 4]
print(zero_sub(xs))

xs = [2, -5, 4, -6, 3]
print(zero_sub(xs))

(1, 3, 0)
(2, 4, 1)


In [None]:
# two pointers approach
def close_zero_sub(xs):
    pass

xs = [-1, 3, 2, -5, 4]
print(close_zero_sub(xs))

xs = [2, -5, 4, -6, 3]
print(close_zero_sub(xs))

## for an array of distinct elements, find all triplets whose product is equal to given constant


In [3]:
xs = [1, 4, 6, 2, 3, 8]
m = 24


def find_triplets(xs, m):
    h = {x: i for i, x in enumerate(xs)}
    r = set()
    for i1 in range(len(xs)):
        for i2 in range(i1+1, len(xs)):
            p = xs[i1] * xs[i2]
            if p != 0 and m >= p and m % p == 0:
                x3 = m // p
                if x3 in h and h[x3] != i2 and h[x3] != i1:
                    r.add(tuple(sorted([xs[i1], xs[i2], x3])))
    return len(r)

xs = [1, 4, 6, 2, 3, 8]
m = 24
assert find_triplets(xs, m) == 3

xs = [0, 4, 6, 2, 3, 8]
m = 18
assert find_triplets(xs, m) == 0

## find a pair of elements, each from two different arrays, by swapping them, we get the same sum for the two arrays

In [4]:
def swap_to_equal(xs, ys):
    hys = set(ys)
    sum_xs = sum(xs)
    sum_ys = sum(ys)
    target_sum = (sum_xs + sum_ys) // 2
    change_for_xs = target_sum - sum_xs
    result = set()
    for x in xs:
        y = change_for_xs + x
        if y in hys:
            result.add((x, y))
    return result

xs = [4, 1, 2, 1, 1, 2]
ys = [1, 6, 3, 3]
print(swap_to_equal(xs, ys))

xs = [5, 7, 4, 6]
ys = [1, 2, 3, 8]
print(swap_to_equal(xs, ys))

{(2, 3)}
{(5, 1), (6, 2), (7, 3)}


In [9]:
## it is an variant of two pointers approach on pair difference
def swap_to_equal(xs, ys):
    xs = sorted(xs)
    ys = sorted(ys)
    sum_xs = sum(xs)
    sum_ys = sum(ys)
    ix = iy = 0
    result = set()
    while (ix < len(xs) and iy < len(ys)):
        x, y = xs[ix], ys[iy]
        if sum_xs - x + y < sum_ys - y + x:
            iy += 1
        elif sum_xs - x + y > sum_ys - y + x:
            ix += 1
        else:
            result.add( (x, y) )
            ix += 1
            iy += 1
    return result

xs = [4, 1, 2, 1, 1, 2]
ys = [1, 6, 3, 3]
print(swap_to_equal(xs, ys))

xs = [5, 7, 4, 6]
ys = [1, 2, 3, 8]
print(swap_to_equal(xs, ys))

{(2, 3)}
{(5, 1), (6, 2), (7, 3)}


## find the pair of elements, each from one of two array, whose sum is equal to a given k
- you can use hash
- or use two pointers - popular with all CSP problem that has a partial order

In [9]:
## two pointers solution

def foo(xs, ys, k):
    xs = sorted(xs)
    ys = sorted(ys)
    ix, iy = 0, len(ys)-1
    result = set()
    while ix < len(xs) and iy > 0:
        x, y = xs[ix], ys[iy]
        if x + y < k:
            ix += 1
        elif x + y > k:
            iy -= 1
        else:
            result.add((x, y))
            ix += 1
            iy -= 1
    return result

xs = [1, 3, 5, 7]
ys = [2, 3, 5, 8]
k = 10
print(foo(xs, ys, k))

xs = [1, 2, 3, 4, 5, 7, 11]
ys = [2, 3, 4, 5, 6, 8, 12]
k = 9
print(foo(xs, ys, k))

{(7, 3), (5, 5)}
{(4, 5), (5, 4), (1, 8), (3, 6)}


## find subarrays that have the same set of elements with the original one

In [1]:
from collections import defaultdict
def sub_of_unique(xs):
    n_unique = len(set(xs))
    n = len(xs)
    
    result = []
    window = {}
    start, end = 0, 0
    while start < n:
        while end < n and len(window) < n_unique:
            x = xs[end]
            window[x] = window.get(x, 0) + 1
            end += 1
        if len(window) == n_unique:
            for post_end in range(end-1, n):
                result.append((start, post_end))
        window[xs[start]] -= 1
        if window[xs[start]] == 0:
            del window[xs[start]]
        start += 1
    return result

xs = [2, 1, 3, 2, 3] # 5
print(sub_of_unique(xs))
xs = [2, 4, 5, 2, 1] # 2
print(sub_of_unique(xs))
xs = [2, 4, 4, 2, 4] # 9
print(sub_of_unique(xs))

[(0, 2), (0, 3), (0, 4), (1, 3), (1, 4)]
[(0, 4), (1, 4)]
[(0, 1), (0, 2), (0, 3), (0, 4), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]


## find the pair of elements, each from one of two arrays, whose abs-diff is equal to k. One element in an array can only be used zero or once
- Another application of "invariants" (or two pointers)
- firtly, the two pointers are not one from beginning, the other from the back - because we want to make the pair as close as possible
- the proof is similiar though: at a moment, anything on the left of the pivots should not be able to be a solution. 

In [21]:
def find_pairs(xs, ys, k):
    xs = sorted(xs)
    ys = sorted(ys)
    ix, iy = 0, 0
    
    pairs = []
    while ix < len(xs) and iy < len(ys):
        x, y = xs[ix], ys[iy]
        if abs(x-y) <= k:
            pairs.append((x, y))
            ix += 1
            iy += 1
        elif x < y:
            ix += 1
        else:
            iy += 1
    return pairs

xs = [3, 4, 5, 2, 1]
ys = [6, 5, 4, 7]
k = 3

print(find_pairs(xs, ys, k))

xs = [10, 15, 20]
ys = [17, 12, 24]
k = 3

print(find_pairs(xs, ys, k))

[(1, 4), (2, 5), (3, 6), (4, 7)]
[(10, 12), (15, 17)]


## Given an array having N integers, find the contiguous subarray having sum as great as possible,, but not greater than M.
- https://www.quora.com/Given-an-array-of-integers-A-and-an-integer-k-find-a-subarray-that-contains-the-largest-sum-subject-to-a-constraint-that-the-sum-is-less-than-k
- O(nlogn)

In [3]:
import bisect
def best_subarray(xs, k):
    cumsums = [0]
    cum = 0
    best = -float("inf")
    for x in xs:
        cum += x
        
        i = bisect.bisect_left(cumsums, cum-k)
        # if use bisect_right, when the last element is equal, it will return len(xs)
        
        if i != len(cumsums):
#             print(cumsums, cum, cumsums[i], best)
            best = max(cum-cumsums[i], best)
        bisect.insort(cumsums, cum)
        
    return best
        
    
    
xs = [2, 2, -1]
k = 0
print(best_subarray(xs, k))

xs = [2, 2, -1]
k = 3
print(best_subarray(xs, k))

xs = [2, 2, -2, 5, 1]
k = 3
print(best_subarray(xs, k))

-1
3
3


In [89]:
bisect.bisect_right([1, 2], 2), bisect.bisect_left([1, 2], 2)

(2, 1)

In [1]:
## two pointers solution
## if the elements of array is strictly positive
## the cum sum will be strictly increasing. 
## so we can use the two pointers approach
## it is O(n) algorithm as the worst case is 
## to move left and right to the end of array

def best_subarray(xs, k):
    left = right = 0
    cum_sum = 0
    best = 0
    while left < len(xs):
        while right < len(xs) and cum_sum + xs[right+1] <= k:
            right += 1
            cum_sum += xs[right]
        if cum_sum + xs[right] <= k:
            best = max(best, cum_sum + xs[right])
        left += 1
        cum_sum -= xs[left-1]
    return best

In [6]:
    
# xs = [2, 2, -1]
# k = 0
# print(best_subarray(xs, k))

xs = [2, 2, -1]
k = 3
print(best_subarray(xs, k))

xs = [2, 2, -2, 5, 1]
k = 3
print(best_subarray(xs, k))

3
3


## Arrays Apple Stocks
www.interviewcake.com Charlie Parks

I have an array stock_prices_yesterday where:
The indices are the time, as a number of minutes past trade opening time, which was 9:30am local time.
The values are the price of Apple stock at that time, in dollars.
For example, the stock cost $500 at 10:30am, so stock_prices_yesterday[60] = 500.
Write an efficient algorithm for computing the best profit I could have made from 1 purchase and 1 sale of 1 Apple stock yesterday. For this problem, we won't allow "shorting"—you must buy before you sell.

In [14]:
def best_trade(stock_prices):
    """
    Inputs: stock_prices, an array of stock prices at increasing time
    Outputs:
        - when to buy
        - when to sell
        - profit
    such that profit is maximized
    """
    buy, sell, profit = 0, 0, float('-inf')
    min_price, max_price = float('inf'), float('-inf')
    for i, price in enumerate(stock_prices):
        if (price < min_price):
            min_price = price
            buy = i
        if (price > max_price):
            max_price = price
            sell = i
    profit = stock_prices[sell] - stock_prices[buy]
    return (buy, sell, profit)

def test():
    stock_prices = [5, 2, 1, 2, 3, 6, 7, 11, 5, 3]
    buy, sell, profit = best_trade(stock_prices)
    assert (buy, sell, profit) == (2, 7, 10), (buy, sell, profit)
    
test()

## Remove Numbers in Array

codercareer.blogspot.com Charlie Parks

Given an array and a value, how to implement a function to remove all instances of that value in place and return the new length?

In [18]:
def remove(xs, x):
    """
    Input:
        - xs: array
        - x: element to remove
    Output:
        - array after x removed
    Should use O(1) space (in-place)
    """
    src, dst = 0, 0
    while src < len(xs):
        if (src != dst):
            xs[src], xs[dst] = xs[dst], xs[src]
        src += 1
        if xs[dst] != x:
            dst += 1
    return xs[:dst]

def test():
    xs, x = [4, 3, 2, 1, 2, 3, 6], 2
    assert remove(xs, x) == [4, 3, 1, 3, 6], remove(xs, x) 
    
test()

## remove the duplicates
Given an array, remove the duplicates and return a unique array keeping the first occurrence of the duplicates and the order.

In [25]:
def remove_dup(xs):
    """
    Inputs: xs array
    Output: ys: array with duplicated elements removed, order kept
    """
    h = set()
    src, dst = 0, 0
    while src < len(xs):
        if src != dst:
            xs[src], xs[dst] = xs[dst], xs[src]
        src += 1
        if xs[dst] not in h:
            h.add(xs[dst])
            dst += 1
        else:
            pass
    return xs[:dst]

def test():
    xs = [2, 1, 1, 1, 2, 3]
    assert remove_dup(xs) == [2, 1, 3], remove_dup(xs)
    
test()

# BST
- it is all about searching and taking advantage of the property (bounds) of trees

## find the element in a BST, which is upper bounded by k

In [10]:
#    5
#  2    12
# 1 3  9  21
#       19 25

from collections import namedtuple
T = namedtuple("T", ("d", "l", "r"))

tree = T(5,
  T(2, 
    T(1, None, None),
    T(3, None, None)),
  T(12,
    T(9, None, None),
    T(21,
      T(19, None, None),
      T(25, None, None))))



def find_greatest_upper_bound_by(tree, upper):
    if tree is None:
        return float('-inf')
    else:
        if tree.d == upper:
            return tree.d
        elif tree.d < upper:
            return max(tree.d, find_greatest_upper_bound_by(tree.r, upper))
        else: # tree.d > upper
            return find_greatest_upper_bound_by(tree.l, upper)
        
upper = 24 # -> 21
assert find_greatest_upper_bound_by(tree, upper) == 21
upper = 4 # -> 3
assert find_greatest_upper_bound_by(tree, upper) == 3

upper = 0 # -> -inf
assert find_greatest_upper_bound_by(tree, upper) == float('-inf'), find_greatest_upper_bound_by(tree, upper)

## merge n sorted arrays

In [38]:
xs1 = iter([1, 3, 5, 7])
xs2 = iter([2, 4, 6, 8])
xs3 = iter([0, 9, 10, 11])

streams = [xs1, xs2, xs3]
n = len(streams)

import heapq

minheap = [next(s) for s in streams]
heapq.heapify(minheap)

## return # of even-number branch node
- the value of the node is even number
- the node is a branch node (rather than a leaf)
- it is just the traverse of the tree
```
          +---+
          | 2 |
          +---+
         /     \
     +---+     +---+
     | 8 |     | 1 |
     +---+     +---+
    /         /     \
+---+     +---+     +---+
| 0 |     | 7 |     | 6 |
+---+     +---+     +---+
         /               \
     +---+               +---+
     | 4 |               | 9 |
     +---+               +---+
```
returns 3 (2, 8, 6)

In [49]:
from collections import namedtuple

TreeNode = namedtuple("TreeNode", ("data", "left", "right"))
def Node(data, left=None, right=None):
    return TreeNode(data, left, right)

def even_branch_node(tree):
    """
    Input: 
        - tree: bst of ints
    Output:
        - number of non-leaf nodes whose elements are even
    O(n)
    """
    if tree is None:
        return 0
    elif tree.left is None and tree.right is None:
        return 0
    else:
        return (tree.data % 2 == 0) + even_branch_node(tree.left) + even_branch_node(tree.right)
    
def test():
    tree = Node(2,
               Node(8,
                   Node(0),
                   None),
               Node(1,
                   Node(7,
                       Node(4),
                       None),
                   Node(6,
                       None,
                       Node(9)))
            )
    result = even_branch_node(tree)
    assert result == 3, result
    
test()

# Linked List

## insertion sort of doubly linked list

In [19]:
class Node:
    def __init__(self, data, prev=None, next=None):
        self.data = data
        self.prev = prev
        self.next = next
    def __repr__(self):
        return "{}".format(self.data)
    
def make_ddl(nodes):
    for i in range(1, len(nodes)):
        nodes[i-1].next = nodes[i]
        nodes[i].prev = nodes[i-1]
    
    ddl = nodes[0]
    return ddl

def print_list(head):
    while head is not None:
        print(head, end=" ")
        head = head.next

In [20]:

        
nodes = [Node(2), Node(5), Node(7), Node(3), Node(-1)]
head = make_ddl(nodes)



def insertion_sort(head):
    p = head.next
    while p != None:
        p_next = p.next # housekeeping
        q = head
        while p_next is not None and q != p_next.prev:
            if q.data >= p.data: break
            q = q.next
#         print(p)
#         print(q.prev, q, q.next)
        if p != q:
            p.next = q
            p.prev = q.prev
            if q.prev:
                q.prev.next = p
            else:
                head = p
            q.prev = p
#             print("=>", p.prev, p, p.next)
#             print("=>", q.prev, q, q.next)
        p = p_next
    return head

In [None]:
## BUGS HERE 
s = insertion_sort(head)
print_list(s)

## delete adjancent duplicate nodes from  dll
- e.g., 10 <-> 8 <-> 4 <-> 4 <-> 8 <-> 5

In [24]:
nodes = [Node(i) for i in [10, 8, 4, 4, 8, 5]]
ddl = make_ddl(nodes)
print_list(ddl)

10 8 4 4 8 5 

In [33]:
def remove_adjancent_duplicates(ddl):
    p = ddl
    while p.next != None:
        q = p.next
        if p.data == q.data:
            p.next = q.next
            q.next.prev = p
        else:
            p = p.next
    return ddl

In [35]:
new_ddl = remove_adjancent_duplicates(ddl)
print_list(new_ddl)

10 8 4 8 5 

# Trie
- It is important to have a convention for leaf node

## simple implementation of Trie in python
- https://stackoverflow.com/questions/11015320/how-to-create-a-trie-in-python
- based on dictionary - to allow a key to have different items, the leaf must be a key:value pair

In [59]:
LEAF = None



def make_trie(words):
    trie = {}
    for word in words:
        t = trie
        for letter in word:
            if letter not in t:
                t[letter] = {}
            t = t[letter]
        t[LEAF] = None # value can be anything
    return trie

def in_trie(word, trie):
    t = trie
    for letter in word:
        if letter not in t:
            return False
        else:
            t = t[letter]
    return True if LEAF in t else False
    

words = ['foo', 'bar', 'baz', 'barz']
make_trie(words) == {
    'b': {
        'a': {
                'r': {None: None, 'z': {None: None}}, 
                'z': {None: None}
             }
    },
    'f': {'o': {'o': {None: None}}}
}

True

In [60]:

trie = make_trie(words)
assert in_trie("foo", trie) == True
assert in_trie("fooa", trie) == False
assert in_trie("fo", trie) == False
assert in_trie("bar", trie) == True

# Graphs
- two types of data structure
    - adjancency list
    - adjancency matrix