Chapter 4. Sort

In [None]:
# Quick Sort 
# 先整体后局部
# ...   ...
# <= P  >= P
#  QS    QS

def quick_sort(li):
    if not li:
        # Inplace sort, don't have to return li
        return 
    
    start, end = 0, len(li) - 1
    return qsort(li, start, end)

# [ 3,  0,  4,  2,  1,  7,  5,  8 ]
#             pivot_val
# left                         right
def qsort(li, start, end):
    # if len(list) == 1 return itself
    if start >= end:
        return 
   
    # 1. pivot selection
    #    pivot is a value ignore index
    pivot = li[(start + end) // 2]
    # move all values larger than pivot value in front, and all values lower than pivot behind

    left, right = start, end
    # 2. left <= right not left < right
    #    ex. [1, 2]
    while left <= right:
        # 3. li[left] < pivot not li[left] <= pivot
        #    ex. [1, 1, 1]
        while left <= right and li[left] < pivot:
            left += 1
        while left <= right and li[right] > pivot:
            right -= 1
        if left <= right:
            li[left], li[right] = li[right], li[left]
            left += 1
            right -= 1

    # recusively pick pivots and sort the subarray until the whole list is sorted
    qsort(li, start, right)
    qsort(li, left, end)

    return 

li = [3, 0, 4, 2, 1, 7, 5, 8 ]
print(li)

quick_sort(li)
print(li)

In [3]:
# Merge Sort 
# 先局部后整体

def merge_sort(li):
    if len(li) < 2:
        return li
        
    index = int(len(li) / 2)
    li_left = merge_sort(li[:index])
    li_right = merge_sort(li[index:])
    return merge(li_left, li_right)

def merge(li_left, li_right):
    l, r = 0, 0
    li_res = []
    while l < len(li_left) and r < len(li_right):
        if li_left[l] < li_right[r]:
            li_res.append(li_left[l])
            l += 1
        else:
            li_res.append(li_right[r])
            r += 1
    li_res += li_left[l:]
    li_res += li_right[r:]
    return li_res

li = [3, 0, 4, 2, 1, 7, 5, 8 ]
print(li)

li2 = merge_sort(li)
print(li2)


[3, 0, 4, 2, 1, 7, 5, 8]
[0, 1, 2, 3, 4, 5, 7, 8]


In [None]:
# Return void?
# Pass in assistant list space before recursion ? 
# Answer: to avoid extra waste of space on the stack of recursive function calls!
from lib_test import metatest, trace
def merge_sort_main(li):
    if len(li) <= 1:
        return
    li_temp = [0] * len(li)
    start, end = 0, len(li) - 1
    merge_sort2(li, start, end, li_temp)

@trace
def merge_sort2(li, start, end, li_temp):
    if start >= end:
        return
        
    middle = (start+ end) // 2 
    merge_sort2(li, start, middle, li_temp)
    merge_sort2(li, middle + 1, end, li_temp) # middle + 1
    merge2(li, start, end, li_temp)
    return
 
def merge2(li, start, end, li_temp):
    middle = (start + end) // 2
    leftIndex = start
    rightIndex = middle + 1
    index = leftIndex
    # li_res = []
    while leftIndex <= middle and rightIndex <= end:
        if li[leftIndex] < li[rightIndex]:
            li_temp[index] = li[leftIndex]
            index += 1
            leftIndex += 1
        else:
            li_temp[index] = li[rightIndex]
            index +=1
            rightIndex += 1
    while leftIndex <= middle: # <= instead of <
        li_temp[index] = li[leftIndex]
        index += 1
        leftIndex += 1
    while rightIndex <= end:
        li_temp[index] = li[rightIndex]
        index += 1
        rightIndex += 1
    for i in range(start, end + 1): # end + 1 instead of end
        li[i] = li_temp[i]

if __name__ == '__main__':
    li = [3, 0, 4, 2, 1, 7, 5, 8]
    print(li)

    merge_sort_main(li)
    print(li)

Quick Sort Vs Merge Sort
先整体后局部 Vs 先局部后整体
Sort the general rankings first then sort the local ranges

            Time Complexity
Quick Sort  Worst Senario - O(n^2) ex. 1, 2, 3, 4, 5... pick 1st elem as pivot
Merge Sort  Always O(nlogn)

            Space Complexity
Quick Sort  O(1)
Merge Sort  O(n)

            Stability
Quick Sort  Unstable
Merge Sort  Stable

Time Complexity Calculation
T(n) = 2 * T(n/2) + O(n)

In [None]:
# Quick Selection
# Kth Largest Element

#  1 2 3 4 | 5 6 7 8
#  2/n
#  check k is in first half or 2nd half
#  T(n) = O(1) + T(n/2)
#       = O(1) + O(1) + T(n/4)
#       = O(logn)

def kthLargestElement(li, k):
    if not li:
        return -1
    start, end = 0, len(li) - 1
    return quick_select(li, start, end, k)

# [ 3,  0,  4,  2,  1,  7,  5,  8 ]
#             pivot_val
# left                         right
def quick_select(li, start, end, k):
    if start >= end:
        return 
   
    pivot = li[(start + end) // 2]
    # move all values larger than pivot value in front, and all values lower than pivot behind

    left, right = start, end
    # 2. left <= right not left < right
    #    ex. [1, 2]
    while left <= right:
        # 3. li[left] < pivot not li[left] <= pivot
        #    ex. [1, 1, 1]
        while left <= right and li[left] < pivot:
            left += 1
        while left <= right and li[right] > pivot:
            right -= 1
        if left <= right:
            li[left], li[right] = li[right], li[left]
            left += 1
            right -= 1

    # [1,    2,    3,    4,    5,    6,    7]
    # start                               end
    #                left/right
    #             right       left    
    if start + k - 1 <= right:
        return quick_select(li, start, right, k)
    if start + k - 1 >= left:
        return quick_select(li, left, end, k - (left-start))

    return right + 1

li = [3, 0, 4, 2, 1, 7, 5, 8 ]
print(li)

quick_sort(li)
print(li)

- 快速选择算法的 Partition 的实质：
快速选择/快速排序中的 partition 是 可左可右 的partition，也就是说，对于nums[i] == pivot 时，这个数字既可以放在左边，也可以放在右边。

- 为什么这样划分数组呢？
原因是为了避免出现类似 [1,1,1,1,1,1] 的数组中的元素，全部被分到一边的情况。我们让 nums[i] == pivot 的情况既不属于左边也不属于右边，这样就能够让 partition 之后的结果稍微平衡一些。
如果 quick select / quick sort 写成了nums[i] < pivot 在左侧，nums[i] >= pivot 在右侧这种形式，就会导致划分不平均，从而导致错误或者超时。

- 为什么问题《partition array》不能使用同样的代码？
对于问题《partition array》来说，题目的要求是将数组划分为两个部分，一部分满足一个条件，另外一部分不满足这个条件，所以可以严格的把 nums[i] < pivot 放在左侧，把 nums[i] >= pivot 放在右侧，这样子做完一次 partition 之后，就能够将这两部分分开。

- 总结
简单的说就是，quick select 和 quick sort 的 partition 目标不是将数组 严格的按照 nums[i] < pivot 和nums[i] >= pivot 去拆分开，而是只要能够让左半部分 <= 右半部分即可。这样子 nums[i] == pivot 放在哪儿都无所谓，两边都可以放。