In [None]:
import timeit
from random import randint
from IPython.display import HTML, display

# utils for sorting 

"""
generate an int array 
n: array size
min ≦ arr[i] ≦ max 
"""
def genRandomArr(n, min, max): 
    return [randint(min, max) for x in range(n)]

"""
generate a nearly ordered int array 
n: array size
"""
def genNearlyOrderedArr(n, swaps):
    arr = [i for i in range(n)] #0, 1, 2, ..., n-1
    for j in range(swaps):
        pos1 = randint(0, n-1)
        pos2 = randint(0, n-1)
        arr[pos1], arr[pos2] = arr[pos2], arr[pos1] # randomly swap two elements 
    return arr 

"""
return True if array is sorted 
"""
def is_sorted(arr): 
    for i in range(0, len(arr) - 1): 
        if arr[i] > arr[i+1]: 
            return False
    return True 

"""
invoke the sorting function and assert the result is correct 
func: sorting function 
arr: arr to be sorted 
"""
def sorting_test(func, arr): 
    # make a copy of arr since we need to test multiple sorting functions with arr
    arr2 = list(arr)  
    func(arr2) 
    print("<result> %s:" % func.__name__.rjust(20), arr2[:10])
    assert is_sorted(arr2), " !!! SORTING ERROR !!!"

In [None]:
# Selection 
# sort the arr from left to right 
# for position i 
# arr[0, i) sorted, arr[i,n) unsorted 
# find the smallest from arr[i, n) and place to arr[i]        
# O(n^2)

print("Selection")
html = """<img src='https://mth252.fastzhong.com/notebooks/sort_selection.gif' style='width: 70%'>"""
display(HTML(html))

def sort_selection(arr): 
    n = len(arr)
    for i in range(n - 1): 
        # look for the min from [i, n-1]
        min_pos = i 
        for j in range(i+1, n): 
            if arr[j] < arr[min_pos]:
                min_pos = j 
        if min_pos != i: 
            # move min to arr[i] by swapping arr[i] and arr[min_pos]
            arr[i], arr[min_pos] = arr[min_pos], arr[i]
    return arr

arr_sample = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
print("        sample: ", arr_sample)
print("sort_selection: ", sort_selection(arr_sample))

In [None]:
# Insertion 
# sort the arr from left to right 
# for postion i
# arr[0, i) sorted, arr[i, n) unsorted
# insert arr[i] to the proper position on the left 
# O(n^2)

print("Insertion")
html = """<img src='https://mth252.fastzhong.com/notebooks/sort_insertion.gif' style='width: 70%'>"""
display(HTML(html))

def sort_insertion(arr): 
    # return sort_insertion_swap(arr, 0, len(arr) - 1)
    return sort_insertion_shift(arr, 0, len(arr) - 1)

def sort_insertion_swap(arr, l, r):
    i = l
    while i <= r:
        j = i
        while j - 1 >= l: 
            if arr[j] < arr[j -1]: 
                # swap 
                arr[j], arr[j-1] = arr[j-1], arr[j]
            else:
                break 
            j -= 1
        i += 1
    return arr
        
def sort_insertion_shift(arr, l, r):
    i = l
    while i <= r:
        tmp = arr[i] # use tmp to store arr[i], later we can shift instead of swap 
        j = i
        while j - 1 >= l and tmp < arr[j-1] : 
            # shift arr[j-1] to arr[j]
            arr[j] = arr[j-1] 
            j -= 1
        # found the proper position j for tmp 
        arr[j] = tmp
        i += 1 
    return arr

arr_sample = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
print("        sample: ", arr_sample)
print("sort_insertion: ", sort_insertion(arr_sample))

In [None]:
# Bubble 
# sort the arr from right to left
# for postion n-i
# arr[0, n-i] unsorted, arr(n-i, n) sorted
# bubble the biggest to arr[n-i]
# O(n^2)

print("Bubble")
html = """<img src='https://mth252.fastzhong.com/notebooks/sort_bubble.gif' style='width: 70%'>"""
display(HTML(html))

def sort_bubble_v1(arr): 
    n = len(arr)
    for i in range(1, n - 1):  
        swap = False    
        for j in range(n - i): 
            if arr[j] > arr[j + 1]:
                # bubble up the bigger 
                arr[j], arr[j+1] = arr[j+1], arr[j]
                swap = True
        if not swap: 
            # "no swap" means the arr is already sorted 
            break 
    return arr

def sort_bubble_v2(arr): 
    n = len(arr)
    i = 1 
    while i < (n - 1):  
        last_swap = 0  
        for j in range(n - i): 
            if arr[j] > arr[j + 1]:
                # bubble up the bigger 
                arr[j], arr[j+1] = arr[j+1], arr[j]
                last_swap = j+1
        # fast forward i to the last swap position
        i = n - last_swap
    return arr

arr_sample = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
print("        sample: ", arr_sample)
print("sort_bubble_v1: ", sort_bubble_v1(arr_sample))
arr_sample = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
print("sort_bubble_v2: ", sort_bubble_v2(arr_sample))

In [None]:
# Merge (top down)
# sort_merge(arr, l, r): 
# m = (l + r)/2
# recursive sort [l, m]
# recursive sort [m+1, r]
# merge two sorted array [l, m] & [m+1, r]   

print("Merge")
html = """<img src='https://mth252.fastzhong.com/notebooks/sort_merge.gif' style='width: 70%'>"""
display(HTML(html))

# merge two sorted array [l, m] & [m+1, r] 
def merge_v1(arr, l, m, r): 
    tmp = arr[l:r+1]
    # two pointers 
    p1 = l   # for 1st array 
    p2 = m + 1 # for 2nd array 
    cur = l  # target position: [l, r]
    while cur <= r:
        if p1 > m: 
            # 1st array done, just copy from 2nd array  
            arr[cur] = tmp[p2 - l]
            p2 += 1
        elif p2 > r: 
            # 2nd array done, just copy from 1st array  
            arr[cur] = tmp[p1 - l]
            p1 += 1
        else: 
            # compare p1 and p2 
            if tmp[p1 - l] <= tmp[p2 - l]: 
                # p1 is smaller so copy from p1 
                arr[cur] = tmp[p1 - l]
                p1 += 1 
            else:
                # p2 is smaller so copy from p2
                arr[cur] = tmp[p2 - l]
                p2 += 1
        cur += 1
    return arr

# merge two sorted array [l, m] & [m+1, r] 
def merge_v2(arr, l, m, r, tmp): 
    i = l 
    while i <= r:
        tmp[i] = arr[i]
        i += 1
    # two pointers 
    p1 = l   # for 1st array 
    p2 = m + 1 # for 2nd array 
    cur = l  # target position: [l, r]
    while cur <= r:
        if p1 > m: 
            # 1st array done, just copy from 2nd array  
            arr[cur] = tmp[p2]
            p2 += 1
        elif p2 > r: 
            # 2nd array done, just copy from 1st array  
            arr[cur] = tmp[p1]
            p1 += 1
        else: 
            # compare p1 and p2 
            if tmp[p1] <= tmp[p2]: 
                # p1 is smaller so copy from p1 
                arr[cur] = tmp[p1]
                p1 += 1 
            else:
                # p2 is smaller so copy from p2
                arr[cur] = tmp[p2]
                p2 += 1
        cur += 1
    return arr

def sort_merge_v1(arr):
    return sort_merge_recusive_v1(arr, 0, len(arr) - 1)

def sort_merge_recusive_v1(arr, l, r):
    if l >= r:
        return arr
    m = l + (r - l)//2 # m = (l + r)//2
    sort_merge_recusive_v1(arr, l, m)
    sort_merge_recusive_v1(arr, m + 1, r)
    if arr[m] > arr[m + 1]:
        merge_v1(arr, l, m, r)
    return arr 

def sort_merge_v2(arr):
    tmp = arr.copy()
    return sort_merge_recusive_v2(arr, 0, len(arr) - 1, tmp)

def sort_merge_recusive_v2(arr, l, r, tmp):
    if r - l <= 10:
        return sort_insertion_shift(arr, l, r)
    m = l + (r - l)//2 # m = (l + r)//2
    sort_merge_recusive_v2(arr, l, m, tmp)
    sort_merge_recusive_v2(arr, m + 1, r, tmp)
    if arr[m] > arr[m + 1]:
        merge_v2(arr, l, m, r, tmp)
    return arr

# Merge (bottom up)
def sort_merge_bottomup(arr): 
    n = len(arr)
    # size of array to be merged
    m = 1 
    while m < n:
        # merge two sorted array: [i, i + m - 1] & [i + m, min(i + m + m - 1, n - 1)]
        # left: i, mid: i + m - 1, right: min(i + m + m - 1, n - 1)
        i = 0
        while i + m < n:  
            r = min(i + m + m - 1, n - 1)
            if arr[i + m - 1] > arr[i + m]:
                if r - i + 1 <= 10: 
                    sort_insertion_shift(arr, i, r)
                else: 
                    merge_v1(arr, i, i + m -1, r)
            i += m + m
        m += m
    return arr

arr_sample = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
print("             sample: ", arr_sample)
print("      sort_merge_v1: ", sort_merge_v1(arr_sample))
arr_sample = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
print("      sort_merge_v2: ", sort_merge_v2(arr_sample))
arr_sample = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
print("sort_merge_bottomup: ", sort_merge_bottomup(arr_sample))

In [19]:
# Quick 
# partition(arr, l, r): 
# select v, so that [l, p-1] smaller than v and [p+1, r] bigger than v 
# recursive sort [l, p-1]
# recursive sort [p+1, r]

print("Quick")
html = """<img src='https://mth252.fastzhong.com/notebooks/sort_quick.gif' style='width: 70%'>"""
display(HTML(html))

def partition(arr, l, r):
    # avoid problem: 
    # complexity: O(n^2) when arr is sorted 
    # recusion stack overflow 
    p = randint(l, r)
    arr[l], arr[p] = arr[p], arr[l]
    n = len(arr)
    p = l
    # arr[l+1...p] < v
    # arr[p+1...r) ≧ v
    # for position i ⍷ [l+1, r]:  
    # check arr[i] 
    for i in range(l + 1, r + 1):     
        if arr[i] < arr[l]:
            p += 1
            arr[p], arr[i] = arr[i], arr[p]
    arr[l], arr[p] = arr[p], arr[l]
    return p 

def partition_2way(arr, l, r):
    p = randint(l, r)
    arr[l], arr[p] = arr[p], arr[l]
    # arr[l] == v
    # arr[l+1...i-1] ≦ v
    # arr[j+1...r] ≧ v
    # from left, find first arr[i] > v  
    # from right, find first arr[j] < v  
    # swap arr[i], arr[j]
    # i++, j-- and continue 
    i = l + 1
    j = r
    while True: 
        while i <= j and arr[i] < arr[l]: 
            i += 1
        while j >= i and arr[j] > arr[l]:
            j -= 1
        if i >= j:
            break 
        arr[i], arr[j] = arr[j], arr[i]
        i += 1
        j -= 1
    arr[l], arr[j] = arr[j], arr[l]
    return j 

def partition_3way(arr, l, r):
    p = randint(l, r)
    arr[l], arr[p] = arr[p], arr[l]
    # arr[l] == v
    # arr[l+1...lt] ﹤ v
    # arr[lt+1, i-1] == v
    # arr[gt...r] ﹥ v
    # now check arr[i] 
    # when finish, swap arr[l], arr[lt]
    lt = l 
    i = l + 1 
    gt = r + 1 
    while i < gt: 
        if arr[i] < arr[l]:
            lt += 1 
            arr[lt], arr[i] = arr[i], arr[lt]
            i += 1 
            continue 
        if arr[i] > arr[l]:
            gt -= 1 
            arr[gt], arr[i] = arr[i], arr[gt] 
            continue
        # arr[i] == arr[l]
        i += 1
    # arr[l...lt-1]  ﹤ v
    # arr[lt...gt-1] == v
    # arr[gt...r] ﹥ v
    arr[l], arr[lt] = arr[lt], arr[l]
    return lt, gt

def sort_quick(arr):
    return sort_quick_recursive(arr, 0, len(arr) - 1)

def sort_quick_recursive(arr, l, r):
    if l >= r:
        return arr
    p = partition(arr, l, r)
    sort_quick_recursive(arr, l, p - 1)
    sort_quick_recursive(arr, p + 1, r)
    return arr

def sort_quick_2way(arr):
    return sort_quick_2way_recursive(arr, 0, len(arr) - 1)

def sort_quick_2way_recursive(arr, l, r):
    if r - l <= 10:
        return sort_insertion_shift(arr, l, r)
    p = partition_2way(arr, l, r)
    sort_quick_2way_recursive(arr, l, p - 1)
    sort_quick_2way_recursive(arr, p + 1, r)
    return arr
    
def sort_quick_3way(arr):
    return sort_quick_3way_recursive(arr, 0, len(arr) - 1)

def sort_quick_3way_recursive(arr, l, r):
    if r - l <= 10:
        return sort_insertion_shift(arr, l, r)
    lt,gt = partition_3way(arr, l, r)
    # arr[l...lt-1]  ﹤ v
    # arr[lt...gt-1] == v
    # arr[gt...r] ﹥ v
    sort_quick_3way_recursive(arr, l, lt - 1)
    sort_quick_3way_recursive(arr, gt, r)
    return arr

arr_sample = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
print("         sample: ", arr_sample)
print("     sort_quick: ", sort_quick(arr_sample))
arr_sample = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
print("sort_quick_2way: ", sort_quick_2way(arr_sample))
arr_sample = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
print("sort_quick_3way: ", sort_quick_3way(arr_sample))


Quick


         sample:  [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
     sort_quick:  [2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]
sort_quick_2way:  [2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]
sort_quick_3way:  [2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]


In [20]:
# testing data 
n = 10000
test_random = genRandomArr(n, 0, 10 * n)
test_nearly = genNearlyOrderedArr(n, 5)
test_same = [1, 3] + [2] * 10000 

# test setup for sort functions 
sort_simple = ["sort_selection", "sort_insertion", "sort_bubble_v2"] 
sort_efficient = ["sort_merge_v1", "sort_merge_v2", "sort_merge_bottomup", "sort_quick", "sort_quick_2way", "sort_quick_3way"]
sort_funcs = sort_simple + sort_efficient
sort_setup = "from __main__ import test_random, test_nearly, sorting_test, test_same, " + ",".join(sort_funcs)

In [21]:
# test random 
print("       size:", n)
print("test random:", test_random[:10])
sort_timers = [timeit.Timer(stmt=f"sorting_test({f}, test_random)", setup=sort_setup) for f in sort_funcs]
for i,t in enumerate(sort_timers):
    print("  <time> %s: %s" % (sort_funcs[i].rjust(20), t.timeit(number=1)))

       size: 10000
test random: [47268, 19126, 27069, 89940, 30598, 26000, 25014, 31765, 22155, 38126]
<result>       sort_selection: [1, 21, 26, 46, 67, 72, 84, 93, 110, 119]
  <time>       sort_selection: 4.213457448000554
<result>       sort_insertion: [1, 21, 26, 46, 67, 72, 84, 93, 110, 119]
  <time>       sort_insertion: 5.793239777999588
<result>       sort_bubble_v2: [1, 21, 26, 46, 67, 72, 84, 93, 110, 119]
  <time>       sort_bubble_v2: 10.427803865999522
<result>        sort_merge_v1: [1, 21, 26, 46, 67, 72, 84, 93, 110, 119]
  <time>        sort_merge_v1: 0.046518678000211366
<result>        sort_merge_v2: [1, 21, 26, 46, 67, 72, 84, 93, 110, 119]
  <time>        sort_merge_v2: 0.045136722000279406
<result>  sort_merge_bottomup: [1, 21, 26, 46, 67, 72, 84, 93, 110, 119]
  <time>  sort_merge_bottomup: 0.050822371000322164
<result>           sort_quick: [1, 21, 26, 46, 67, 72, 84, 93, 110, 119]
  <time>           sort_quick: 0.034749588000522635
<result>      sort_quick_2way:

In [22]:
# test nearly sorted
print("       size:", n)
print("test nearly:", test_nearly[:10])
sort_timers = [timeit.Timer(stmt=f"sorting_test({f}, test_nearly)", setup=sort_setup) for f in sort_funcs]
for i,t in enumerate(sort_timers):
    print("  <time> %s: %s" % (sort_funcs[i].rjust(20), t.timeit(number=1)))

       size: 10000
test nearly: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
<result>       sort_selection: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
  <time>       sort_selection: 4.056652248999853
<result>       sort_insertion: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
  <time>       sort_insertion: 0.01473455199993623
<result>       sort_bubble_v2: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
  <time>       sort_bubble_v2: 4.428430522000781
<result>        sort_merge_v1: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
  <time>        sort_merge_v1: 0.018285478000507283
<result>        sort_merge_v2: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
  <time>        sort_merge_v2: 0.017436767000617692
<result>  sort_merge_bottomup: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
  <time>  sort_merge_bottomup: 0.021213182000792585
<result>           sort_quick: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
  <time>           sort_quick: 0.03438185300001351
<result>      sort_quick_2way: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
  <time>      sort_quick_2way: 0.020348946000012802
<result>      sort_quick_3way: [0

In [23]:
# test almost same
print("test same:", test_same[:10])
# sort_quick would hit recursion overflow 
# comment out line below to see
sort_funcs.remove("sort_quick") 
sort_timers = [timeit.Timer(stmt=f"sorting_test({f}, test_same)", setup=sort_setup) for f in sort_funcs]
for i,t in enumerate(sort_timers):
    print("  <time> %s: %s" % (sort_funcs[i].rjust(20), t.timeit(number=1)))

test same: [1, 3, 2, 2, 2, 2, 2, 2, 2, 2]
<result>       sort_selection: [1, 2, 2, 2, 2, 2, 2, 2, 2, 2]
  <time>       sort_selection: 3.8225976120002088
<result>       sort_insertion: [1, 2, 2, 2, 2, 2, 2, 2, 2, 2]
  <time>       sort_insertion: 0.005649032999826886
<result>       sort_bubble_v2: [1, 2, 2, 2, 2, 2, 2, 2, 2, 2]
  <time>       sort_bubble_v2: 0.00508300900037284
<result>        sort_merge_v1: [1, 2, 2, 2, 2, 2, 2, 2, 2, 2]
  <time>        sort_merge_v1: 0.011443758000496018
<result>        sort_merge_v2: [1, 2, 2, 2, 2, 2, 2, 2, 2, 2]
  <time>        sort_merge_v2: 0.01054262999969069
<result>  sort_merge_bottomup: [1, 2, 2, 2, 2, 2, 2, 2, 2, 2]
  <time>  sort_merge_bottomup: 0.015260324000337278
<result>      sort_quick_2way: [1, 2, 2, 2, 2, 2, 2, 2, 2, 2]
  <time>      sort_quick_2way: 0.02164434299993445
<result>      sort_quick_3way: [1, 2, 2, 2, 2, 2, 2, 2, 2, 2]
  <time>      sort_quick_3way: 0.0032126689993674518


## Exercise

In [None]:
# Insertion 
# sort the arr from right to left

In [None]:
# Bubble 
# sort the arr from left to right

In [None]:
# Merge (bottom up)

In [None]:
# Shell

In [None]:
# Dual Pivot Quick Sort