#  大纲

2022/05/16-05/19

各种常用的排序算法、实现方法、时间和空间复杂度和典型例题


- [X] 有哪些排序算法，实现一下各种排序算法
    - [X] [BubbleSort](https://www.runoob.com/w3cnote/bubble-sort.html)
    - [X] [Selection Sort](https://www.runoob.com/w3cnote/selection-sort.html)
    - [X] insertion-sort
    - [X] MergeSort
        - [X] 迭代法
        - [X] 递归法
    - [X] HeapSort
    - [X] QuickSort：分治，pivot
    - [X] BucketSort
    - [X] radix-sort
    
- [X] 排序算法之间的比较，见md笔记
- [X] 时间空间复杂度，见md笔记

Google requirements
- Be familiar with common sorting functions and on what kind of input data they’re efficient on or not. 
- Think about efficiency means in terms of runtime and space used. 
- For example, in exceptional cases insertion-sort or radix-sort are much better than the generic QuickSort/MergeSort/HeapSort answers.

参考链接：
- [leetcode cookbook](https://books.halfrost.com/leetcode/ChapterTwo/Stack/)
- [宫水三叶-排序wiki](https://github.com/SharingSource/LogicStack-LeetCode/wiki/%E6%8E%92%E5%BA%8F)
- [dowalle/algo](https://github.com/dowalle/algo)

# 各种排序算法的实现

In [4]:
# Bubble Sort
def BubbleSort(nums):
    # by default, sort by ascending order
    for i in range(1, len(nums)):
        swap = False
        for j in range(len(nums) - i):
            if nums[j+1] < nums[j]:
                nums[j+1], nums[j] = nums[j], nums[j+1]
                swap = True
        if not swap: # for early stop, to reduce the time complexity for the best case.
            break

In [5]:
nums = [1,4,6,2,4,2,1]
BubbleSort(nums)
print(nums)

[1, 1, 2, 2, 4, 4, 6]


In [8]:
# Selection Sort
def SelectionSort(nums):
    # by default, sort by ascending order
    for i in range(len(nums)):
        min_id = -1
        for j in range(i, len(nums)):
            if min_id < 0 or nums[j] < nums[min_id]:
                min_id = j
        nums[i], nums[min_id] = nums[min_id], nums[i]

In [9]:
nums = [1,4,6,2,4,2,1]
SelectionSort(nums)
print(nums)

[1, 1, 2, 2, 4, 4, 6]


In [40]:
# Insertion Sort
def InsertionSort(nums):
    # by default, sort by ascending order
    for i in range(1, len(nums)):
        tmp = nums[i]
        j = i - 1
        while j >= 0 and tmp < nums[j]:
            nums[j+1] = nums[j]
            j -= 1
        nums[j + 1] = tmp

In [43]:
nums = [3,6,2,4,2,1]
InsertionSort(nums)
print(nums)

[1, 2, 2, 3, 4, 6]


In [49]:
# Merge Sort 递归实现，需要额外的O(n)空间
def MergeSort(nums):
    if len(nums) > 1:
        middle = len(nums) // 2
        return merge(MergeSort(nums[:middle]), MergeSort(nums[middle:]))
    return nums

def merge(nums1, nums2):
    new_nums = []
    while nums1 and nums2:
        if nums1[0] <= nums2[0]:
            new_nums.append(nums1.pop(0))
            
        else:
            new_nums.append(nums2.pop(0))
    if nums1:
        new_nums.extend(nums1)
    else:
        new_nums.extend(nums2)
    return new_nums
    

In [50]:
nums = [3,6,2,4,2,1]
nums = MergeSort(nums)
print(nums)

[1, 2, 2, 3, 4, 6]


In [7]:
# Merge Sort 迭代实现
def MergeSort(nums):
    sort_len = 1
    to_merge = [[item] for item in nums]
    while len(to_merge) > 1:
        to_merge_new = []
        while len(to_merge) >= 2:
            to_merge_new.append(merge(to_merge.pop(0), to_merge.pop(0)))
        if to_merge:
            to_merge_new.append(to_merge.pop())
        to_merge = to_merge_new
    return to_merge[0]
        
def merge(nums1, nums2):
    new_nums = []
    while nums1 and nums2:
        if nums1[0] <= nums2[0]:
            new_nums.append(nums1.pop(0))
            
        else:
            new_nums.append(nums2.pop(0))
    if nums1:
        new_nums.extend(nums1)
    else:
        new_nums.extend(nums2)
    return new_nums
        

In [9]:
nums = [3,6,2,4,2,5,7,1]
nums = MergeSort(nums)
print(nums)

[1, 2, 2, 3, 4, 5, 6, 7]


In [10]:
import heapq
def HeapSort(nums):
    heap = []
    for num in nums:
        heapq.heappush(heap, num)
    nums = []
    while heap:
        nums.append(heapq.heappop(heap))
    return nums
        

In [11]:
nums = [3,6,2,4,2,5,7,1]
nums = HeapSort(nums)
print(nums)

[1, 2, 2, 3, 4, 5, 6, 7]


In [34]:
def QuickSort(nums):
    pivot = 0
    for i in range(1, len(nums)):
        if nums[i] < nums[0]:
            pivot += 1
            nums[pivot], nums[i] = nums[i], nums[pivot]
    print(nums)
    if pivot > 0:
        nums[0], nums[pivot] = nums[pivot], nums[0]
        nums[:pivot] = QuickSort(nums[:pivot])
    if pivot < len(nums)-1:
        nums[pivot + 1: ] = QuickSort(nums[pivot + 1: ])
    return nums

In [35]:
nums = [3,6,2,4,2,5,7,1]
nums = QuickSort(nums)
print(nums)

[3, 2, 2, 1, 6, 5, 7, 4]
[1, 2, 2]
[2, 2]
[2]
[6, 5, 4, 7]
[4, 5]
[5]
[7]
[1, 2, 2, 3, 4, 5, 6, 7]


In [25]:
nums = [1,2,3,4,5]
nums = QuickSort(nums)
print(nums) # O(n^2)

[1, 2, 3, 4, 5]
[2, 3, 4, 5]
[3, 4, 5]
[4, 5]
[5]
[1, 2, 3, 4, 5]


In [26]:
nums = [5,4,3,2,1]
nums = QuickSort(nums)
print(nums) # O(n^2)

[4, 3, 2, 1, 5]
[3, 2, 1, 4]
[2, 1, 3]
[1, 2]
[1]
[1, 2, 3, 4, 5]


In [36]:
# 改进版：加一个提前停止，把顺序的时间复杂度降到了O(n)
def QuickSort2(nums):
    pivot = 0
    changed = False
    for i in range(1, len(nums)):
        if nums[i] < nums[0]:
            changed = True
            pivot += 1
            nums[pivot], nums[i] = nums[i], nums[pivot]
    print(nums)
    if not changed:
        return nums
    if pivot > 0:
        nums[0], nums[pivot] = nums[pivot], nums[0]
        nums[:pivot] = QuickSort(nums[:pivot])
    if pivot < len(nums)-1:
        nums[pivot + 1: ] = QuickSort(nums[pivot + 1: ])
    return nums

In [37]:
nums = [1,2,3,4,5]
nums = QuickSort2(nums)
print(nums) # O(n)

[1, 2, 3, 4, 5]
[1, 2, 3, 4, 5]


In [38]:
nums = [5,4,3,2,1]
nums = QuickSort2(nums)
print(nums) # O(n^2)

[5, 4, 3, 2, 1]
[1, 4, 3, 2]
[4, 3, 2]
[2, 3]
[3]
[1, 2, 3, 4, 5]


In [39]:
# Bucket Sort
from collections import Counter
def BucketSort(nums):
    if not nums:
        return nums
    min_val, max_val = min(nums), max(nums)    
    buckets = Counter(nums)
    res = []
    for i in range(min_val, max_val + 1):
        if i in buckets:
            res.extend([i] * buckets[i])
    return res
    

In [47]:
nums = [3,6,12,4,25,5,47,1]
nums = BucketSort(nums)
print(nums)

[1, 3, 4, 5, 6, 12, 25, 47]


In [45]:
def RadixSort(nums):
    k = len(str(max(nums)))
    for j in range(k):
        buckets = [[] for i in range(10)]
        for num in nums:
            buckets[num//(10**j)%10].append(num)
        nums = []
        for b in buckets:
            nums.extend(b)
        print(j, nums)
    return nums

In [46]:
nums = [3,6,12,4,25,5,47,1]
nums = RadixSort(nums)
print(nums)

0 [1, 12, 3, 4, 25, 5, 6, 47]
1 [1, 3, 4, 5, 6, 12, 25, 47]
[1, 3, 4, 5, 6, 12, 25, 47]


# leetcode例题
 
- Quicksort
     - [(medium)75颜色分类](https://leetcode.cn/problems/sort-colors/)。
         - 思路一：快排。快排需要注意的是，为了减少交换次数，实际上待交换的数是和pivot元素的后一位做交换。结束之后，还要做一次pivot和最后一个被交换的元素的交换。
         - 思路二：桶排序，因为知道了最大最小值
     - 🌟[(medium)215数组中的第K个最大元素](https://leetcode.cn/problems/kth-largest-element-in-an-array/)
        - 思路一：堆。见堆的笔记。时间复杂度O(nlogk)，空间复杂度O(k)
        - 思路二：快排寻找pivot。这里不需要完成排序，只需要判断pivot的下标和目标下标的相对位置，再走左边或者右边即可，是个O(nlogn)的算法。随机确定pivot，可以让期望的复杂度变为O(n).参考[题解](https://leetcode.cn/problems/kth-largest-element-in-an-array/solution/shu-zu-zhong-de-di-kge-zui-da-yuan-su-by-leetcode-/)。空间复杂度是O(logn)，栈调用。
 
- MergeSort
    - [(medium)148排序链表](https://leetcode.cn/problems/sort-list/)
         - 思路一：选择排序：链表：不是很容易确定下标，进行分段，所以最直接的想法是选择排序，时间复杂度是n方的，超时。
         - 思路二：合并排序：虽然链表无法直接根据下标，进行分段。但是不是有办法可以把链表分为两段吗？就是快慢指针！这样就可以使用merge sort了。时间复杂度是nlogn。
         - 思路三：桶排序。存储每个节点的值，和最大最小值。最后更改每个节点的值。时间复杂度是n+k。
 
- InsertionSort
     - [(medium)147对链表进行插入排序](https://leetcode.cn/problems/insertion-sort-list/)。
         - 思路一：朴素的插入：每次从头开始检查，直到已排好序的结尾。选择合适的位置插入当前元素。
         - 思路二：改进版。不必每次都从头开始：如果当前元素比结尾的元素大，则直接进入到下一个元素的查找。

- Bucket/RadixSort
    - [(hard)164最大间距](https://leetcode.cn/problems/maximum-gap/)。思路：桶排序。通过一次遍历，可以获取数组中的最大值，最小值，结合数组的个数，很容易知道最大间距的最小可能值，即为diff=max_val - min_val / (len(nums) - 1)，向上取整。可以设计桶，每个桶内的数相差最多diff。这样我们知道最大值，如果要比diff大，则只能出现在相邻的两个桶之间。那么实际上，每个桶只需要保存两个值即可：当前桶内的最大值和当前桶内的最小值，用下一个桶的最小值，减去当前桶的最大值，就是落在这两个桶里的相邻两个数之间的间距。
    - [(medium)148排序链表](https://leetcode.cn/problems/sort-list/)
         - 思路一：选择排序：链表：不是很容易确定下标，进行分段，所以最直接的想法是选择排序，时间复杂度是n方的，超时。
         - 思路二：合并排序：虽然链表无法直接根据下标，进行分段。但是不是有办法可以把链表分为两段吗？就是快慢指针！这样就可以使用merge sort了。时间复杂度是nlogn。
         - 思路三：桶排序。存储每个节点的值，和最大最小值。最后更改每个节点的值。时间复杂度是n+k。
    - 🌟[(medium)220存在重复元素III](https://leetcode.cn/problems/contains-duplicate-iii/)
        - 思路一：两层循环，外层对nums中的每一个元素，内层对当前元素的后k个元素进行循环，判断是否满足条件。时间复杂度O(nk)，空间复杂度O(1). 超时
        - 思路二：桶排序🌟。非常巧妙！时间复杂度O(n), 空间复杂度O(k).是如何想到的呢？对于当前元素，我们只和它之前的最多k个元素相比较，只需要快速判断他们的差值是否小于等于t即可。如何快速判断？假设能把小于等于t map到一个桶中，一查询就知道了。但显然桶的分割方法是固定的，在两个桶边缘的数字，差值只为1，所以还需要但也只需要检测相邻两个桶。注意对窗口外的元素进行出桶。       
    - 🌟[(medium)324摆动排序II](https://leetcode.cn/problems/wiggle-sort-ii/)。思路：桶排序。这道题并不需要完全的排序，题目中给出了数的大小范围，考虑使用桶排序。有个需要注意的细节：按桶从大到小，先填偶数位（大数），再填奇数位。这么做的原因是，如果从小到大填写，会出现 [1, 2, 2, 3]这种情况，但是如果从大到小填写，则是[2, 3, 1, 2]。这么做可以把相同的数岔开。因为是从大到小填，所以要先填大数，再填小数。
    - [(medium)451根据字符出现频率排序](https://leetcode.cn/problems/sort-characters-by-frequency/)
        - 思路一：直接按频率排序。时间复杂度是O(n + klogk)的
        - 思路二：桶排序。这又是一个知道上下届的题目，可以使用桶将复杂度降为线性的O(n + k)
    - 🌟[(medium)767重构字符串](https://leetcode.cn/problems/reorganize-string/)。
        - 思路一：桶排序+最大堆：优先填剩余次数最多的字母。因为要相邻两个不重复，所以每次填两个字母，出堆剩余次数最多的两个字母，填完之后再将剩余次数入堆。如何保证这次填的第二个字母和下次填的第一个字母不一样呢？假设一样会出现矛盾。假设这次填的是a和b，剩余次数分别为ia和ib，因为a在b的前面我们知道ia>=ib，填完之后依然会有ia-1>=ib-1。下一次出堆的时候如果有b，那必然a还在b的前面，只能出现abab的情况，而非abbx。这个解法的时间复杂度是O(n+nlogk)，因为还要进行n次出堆和入堆。空间复杂度是O(k)
        - 🌟思路二：间隔着填入字母。先填偶数位，再填奇数位。对于出现次数最多的字母，最多可能出现i+1次（如果len=2i+1）且这i+1次是所有下标为偶数的位置。如果一个字母出现的次数大于(len+1)//2次，则一定会发生重复；否则，一定可行（需要数学证明）。在可行的情况下，只要间隔着填，就不会出现相邻两个重复的情况。参考[题解](https://leetcode.cn/problems/reorganize-string/solution/by-dodo_1202-kccv/)。这个方法的时间复杂度是O(n+klogk)，第一次遍历获取次数是O(n)的，进行k次出堆是O(klogk)的。空间复杂度是O(k)
    - [(medium)1054距离相等的条形码](https://leetcode.cn/problems/distant-barcodes/)。思路：同767

- 重定义排序函数：
    - 🌟[(easy)937重新排列日志文件](https://leetcode.cn/problems/reorder-data-in-log-files/)。本质上是按照要求重新定义一种排序方式。然后可以使用python内置的sort函数
    - [(medium)179最大数](https://leetcode.cn/problems/largest-number/)。
        - 思路一：试图使用桶排序，然后逆序9-0遍历，发现长度不相等的case十分棘手。譬如 [342, 34234], [432, 43243]。这两个case提取出前缀之后，分别是342 ['', '34'], 432 ['', '43']，那么空白字符串对应的应该是放在前面还是后面呢？第一个应该放在后面，即34342，第二个应该放在前面，即43243432. 这个判断过程是怎么进行的呢？一般而言， 空字符可以当作前缀第一个数字来对待，但是对于以前缀第一个数字为key的子桶来说，他们之间如何比较大小？只能是通过比较 前缀+前缀+桶 和 前缀+桶+前缀，来比较大小了。总结，需要特殊对待空字符，和以前缀第一个数字为key的子桶。 另外一个细节：最后要清洗输出，把前缀多余的0去掉，只保留1个。
        特别：这里可以参考[宫水三叶的题解中](https://leetcode.cn/problems/largest-number/solution/gong-shui-san-xie-noxiang-xin-ke-xue-xi-vn86e/)：寻找越界i的问题。对于空白字符串的处理：实际上是循环使用前缀，来和其他数匹配。
        - 思路二：重写排序规则。重新进行排序，如果a+b的字典序比b+a大，则a应该排在b的前面，返回-1.其他情况同理。


- [(medium)969煎饼排序](https://leetcode.cn/problems/pancake-sorting/)。思路：这道题目定义了一种排序的方式，让我们来使用这种方式。首先观察这种排序，最重要的特性是把头元素和尾元素交换位置。可以联想插入排序：向已经排好序的数组中插入新的元素。这里是依次把尾部排好，就不用再动了。在排前面的。细节：可以增加一个early stop，用于已经顺序排好的数组。
- [Google interview1](https://www.jianshu.com/p/f657337918a0)。思路：原地交换，直到不能再交换。找到了重复元素，或者进入下一位的交换。思路同leetcode 41

In [None]:
# 75 快排
class Solution:
    def quicksort(self, nums, start_id, end_id):
        if start_id < end_id - 1:
            index = start_id
            for i in range(start_id + 1, end_id):
                num = nums[i]
                if num < nums[start_id]:
                    index += 1 # 细节：先增加index，再交换。否则，当退出循环的时候，index会比预期的多1
                    nums[index], nums[i] = num, nums[index]
            if index > start_id:
                tmp = nums[start_id]
                nums[start_id] = nums[index]
                nums[index] = tmp
            self.quicksort(nums, start_id, index)
            self.quicksort(nums, index + 1, end_id)


    def sortColors(self, nums: List[int]) -> None:
        """
        Do not return anything, modify nums in-place instead.
        """
        self.quicksort(nums, 0, len(nums))

In [None]:
# 75桶排序
class Solution:
    def sortColors(self, nums: List[int]) -> None:
        """
        Do not return anything, modify nums in-place instead.
        """
        buckets = {}
        for num in nums:
            buckets[num] = buckets.get(num, 0) + 1
        occupied = 0
        for key in [0, 1, 2]:
            if key not in buckets:
                continue
            for i in range(occupied, occupied + buckets[key]):
                nums[i] = key
            occupied += buckets[key]

In [None]:
# 148 选择排序，超时
class Solution:
    def sortList(self, head: Optional[ListNode]) -> Optional[ListNode]:
        out_cur = head
        if not out_cur:
            return head
        while out_cur.next:
            cur = out_cur.next
            while cur:
                if cur.val < out_cur.val:
                    tmp = out_cur.val
                    out_cur.val = cur.val
                    cur.val = tmp
                cur = cur.next
            out_cur = out_cur.next
        return head

In [1]:
# 148 合并排序，通过
# Definition for singly-linked list.
# class ListNode:
#     def __init__(self, val=0, next=None):
#         self.val = val
#         self.next = next
class Solution:
    def sortList(self, head: Optional[ListNode]) -> Optional[ListNode]:
        if not head or not head.next: # 细节一：如果是None或者只有一个节点，直接返回
            return head
        faster = head
        slower_head = ListNode(next=head) # 细节二：这个实际是指向慢指针的节点。假设有2个节点，第一次循环，快指针走到头，慢指针走到第二个，指向慢指针的走到第一个。
        ordered = True # 细节三：为了early stop

        while faster and faster.next:
            if slower_head.next.next.val < slower_head.next.val:
                ordered = False
            faster = faster.next.next
            slower_head = slower_head.next
        
        mid = slower_head.next # 后半段的开始节点
        slower_head.next = None
        if not ordered:
            head = self.sortList(head) # 对应细节三：如果有一半已经排好序了，减少一次排序
        next_head = self.sortList(mid)
        dummy_head = ListNode() # 细节四：dummy 头指针，方便统一循环格式
        cur_head = dummy_head
        while head and next_head:
            if head.val <= next_head.val:
                cur_head.next = head
                head = head.next
            else:
                cur_head.next = next_head
                next_head = next_head.next
            cur_head = cur_head.next
        if head:
            cur_head.next = head
        else:
            cur_head.next = next_head
        return dummy_head.next

NameError: name 'Optional' is not defined

In [None]:
# 148 桶排序，通过
class Solution:
    def sortList(self, head: Optional[ListNode]) -> Optional[ListNode]:
        if not head:
            return head
        bucket = {}
        min_val = head.val
        max_val = head.val
        cur = head
        while cur:
            bucket[cur.val] = bucket.get(cur.val, 0) + 1
            min_val = min(min_val, cur.val)
            max_val = max(max_val, cur.val)
            cur = cur.next
        cur = head
        for i in range(min_val, max_val + 1):
            if i in bucket:
                for _ in range(bucket[i]):
                    cur.val = i
                    cur = cur.next
        return head

In [None]:
# 147 插入排序
# Definition for singly-linked list.
# class ListNode:
#     def __init__(self, val=0, next=None):
#         self.val = val
#         self.next = next
class Solution:
    def insertionSortList(self, head: ListNode) -> ListNode:
        dummy_head = ListNode(next=head)
        if not head or not head.next:
            return head
        second_head = head.next
        first_tail = head
        while second_head:
            # find the position of former_node
            former_node = dummy_head # 每次循环从头开始
            while former_node.next != second_head and former_node.next.val <= second_head.val:
                former_node = former_node.next
            # insert the second_head here
            if second_head != former_node.next:
                tmp_node = second_head.next
                second_head.next = former_node.next
                former_node.next = second_head
                first_tail.next = tmp_node
                # start the next iteration
                second_head = tmp_node
            else:
                first_tail = second_head
                second_head = second_head.next
        return dummy_head.next

In [None]:
# 147 改进版
# Definition for singly-linked list.
# class ListNode:
#     def __init__(self, val=0, next=None):
#         self.val = val
#         self.next = next
class Solution:
    def insertionSortList(self, head: ListNode) -> ListNode:
        dummy_head = ListNode(next=head)
        if not head or not head.next:
            return head
        second_head = head.next
        first_tail = head
        while second_head:
            if first_tail.val <= second_head.val: # 不必每次都从头开始遍历。如果当前元素比上一个结尾大，则直接更新，检查下一个元素
                first_tail = second_head
            else:
                # find the position of former_node
                former_node = dummy_head # 否则的话，再从头开始
                while former_node.next.val <= second_head.val:
                    former_node = former_node.next
                # insert the second_head here
                first_tail.next = second_head.next
                second_head.next = former_node.next
                former_node.next = second_head
            # start the next iteration
            second_head = first_tail.next
        return dummy_head.next



In [None]:
# 937
class Solution:
    def reorderLogFiles(self, logs: List[str]) -> List[str]:
        def trans(log):
            a, b = log.split(" ", 1)
            return (0, b, a) if b[0].isalpha() else (1,)
        logs.sort(key=trans)
        return logs

In [None]:
# 179 思路一：桶排序

class Solution:
    def largestNumber(self, nums: List[int]) -> str:
        def bucketsort(nums, default_val=""):
            if len(nums) == 1:
                return default_val + nums[0]
            buckets = {}
            empty_cnt = 0
            for num in nums:
                if num == "":
                    empty_cnt += 1
                    continue
                start_digit = num[0]
                if start_digit not in buckets:
                    buckets[start_digit] = [num[1:]]
                else:
                    buckets[start_digit].append(num[1:])
            merge_strs = {}
            for key, val in buckets.items():
                merge_strs[key] = bucketsort(val, default_val + key)
            merged_str = ""
            for i in range(9, -1, -1):
                if str(i) not in merge_strs:
                    continue
                if default_val != "" and str(i) == default_val[0]: # 特别处理和前缀首字符相等的子桶
                    if default_val + merge_strs[str(i)] > merge_strs[str(i)] + default_val: 
                        merged_str += ("".join([default_val for _ in range(empty_cnt)]))
                        merged_str += merge_strs[str(i)]
                        empty_cnt = 0
                    else:
                        merged_str += merge_strs[str(i)]
                        merged_str += ("".join([default_val for _ in range(empty_cnt)]))
                        empty_cnt = 0
                else:
                    if (default_val == "" or i < int(default_val[0])) and empty_cnt > 0:
                        merged_str += ("".join([default_val for _ in range(empty_cnt)]))
                        empty_cnt = 0
                    merged_str += merge_strs[str(i)]
            if empty_cnt > 0: # 特别处理‘’字符
                merged_str += ("".join([default_val for _ in range(empty_cnt)]))
            return merged_str
        uncleand = bucketsort([str(num) for num in nums], default_val="")
        for i in range(len(uncleand)): # 清洗最终输出结果
            if uncleand[i] != '0':
                break
        return uncleand[i:]

In [None]:
# 179 思路二：重写排序规则
import functools
class Solution:
    def largestNumber(self, nums: List[int]) -> str:
        def cmp(a, b):
            if a + b == b + a:
                return 0
            if a + b > b + a:
                return -1
            return 1
        str_nums = [str(item) for item in nums]
        str_nums.sort(key=functools.cmp_to_key(cmp))
        res = ''.join(str_nums)
        for i in range(len(res)):
            if res[i] != '0':
                break
        return res[i:]

In [2]:
# 215 思路：快排寻找pivot，不需要完全排序
import random
class Solution:
    def fastsort(self, nums, start_id, end_id, aim_id):
        if end_id == start_id:
            return
        pivot_id = random.randint(start_id, end_id) # 随机化确定pivot
        if pivot_id != start_id:
            nums[start_id], nums[pivot_id] = nums[pivot_id], nums[start_id] # 把pivot放到第一位，便于循环
        swap_id = start_id
        for i in range(start_id +1, end_id + 1):
            if nums[i] <= nums[start_id]:
                swap_id += 1
                nums[swap_id], nums[i] = nums[i], nums[swap_id]
        nums[start_id], nums[swap_id] = nums[swap_id], nums[start_id]
        # 判断相对位置
        if swap_id == aim_id:
            return
        if swap_id < aim_id:
            self.fastsort(nums, swap_id + 1, end_id, aim_id)
        else:
            self.fastsort(nums, start_id, swap_id - 1, aim_id)

    def findKthLargest(self, nums: List[int], k: int) -> int:
        k_smaller_id = len(nums) - k
        self.fastsort(nums, 0, len(nums) - 1, k_smaller_id)
        return nums[k_smaller_id]

1

In [None]:
# 220 思路一：两层循环
class Solution:
    def containsNearbyAlmostDuplicate(self, nums: List[int], k: int, t: int) -> bool:
        for i, numi in enumerate(nums):
            for j in range(i+1, min(i +k + 1, len(nums))):
                if abs(nums[j] - numi) <= t:
                    return True
        return False

In [None]:
# 220 思路二：桶排序
class Solution:
    def containsNearbyAlmostDuplicate(self, nums: List[int], k: int, t: int) -> bool:
        buckets = {}
        for i, numi in enumerate(nums):
            bucket_id = numi // (t + 1) # 桶的分割方法，每t+1个放到一个桶里，这样能保证桶内的差值满足条件。
            if bucket_id in buckets:
                return True
            buckets[bucket_id] = numi
            if bucket_id - 1 in buckets: # 需要但也只需要检测相邻两个桶
                if numi - buckets[bucket_id - 1] <= t:
                    return True
            if bucket_id + 1 in buckets:
                if buckets[bucket_id + 1] - numi <= t:
                    return True
            if i > k: # 对窗口外的元素出桶
                buckets.pop(nums[i-k] // (t+1))
        return False

In [None]:
# 324 桶排序

class Solution:
    def wiggleSort(self, nums: List[int]) -> None:
        """
        Do not return anything, modify nums in-place instead.
        """
        buckets = {}
        for num in nums:
            buckets[num] = buckets.get(num, 0) + 1
        ttl_len = len(nums)
        if ttl_len % 2 == 1:
            small_end = ttl_len
            big_end = ttl_len - 1
        else:
            big_end = ttl_len
            small_end = ttl_len - 1
        start_num = 5000
        for i in range(1, big_end, 2):
            while start_num not in buckets or buckets[start_num] == 0:
                start_num -= 1
            nums[i] = start_num
            buckets[start_num] -= 1
        for i in range(0, small_end, 2):
            while start_num not in buckets or buckets[start_num] == 0:
                start_num -= 1
            nums[i] = start_num
            buckets[start_num] -= 1
        return nums

In [None]:
# 451  思路一：按出现频率排序
from collections import Counter
class Solution:
    def frequencySort(self, s: str) -> str:
        alpha_cnt = Counter([c for c in s])
        to_sort = [(val, key) for key, val in alpha_cnt.items()]
        to_sort.sort(reverse=True)
        res = ""
        for item in to_sort:
            res += item[1] * item[0]
        return res

In [None]:
# 451  思路二：找到最大频率，用桶排序
from collections import Counter
class Solution:
    def frequencySort(self, s: str) -> str:
        alpha_cnt = Counter([c for c in s])
        buckets = {}
        max_buckets = 0
        for key, val in alpha_cnt.items():
            if val in buckets:
                buckets[val].append(key)
            else:
                buckets[val] = [key]
            max_buckets = max(max_buckets, val)
        res = ""
        cur_cnt = max_buckets
        while cur_cnt > 0:
            if cur_cnt in buckets:
                for alpha in buckets[cur_cnt]:
                    res += alpha * cur_cnt
            cur_cnt -= 1
        return res


In [None]:
# 767 桶排序 + 最大堆，每次填两个。
from collections import Counter
import heapq
class Solution:
    def reorganizeString(self, s: str) -> str:
        alpha_cnt = Counter([c for c in s])
        max_heap = []
        for alpha, cnt in alpha_cnt.items():
            heapq.heappush(max_heap, (-cnt, alpha))
        res = ""
        while len(max_heap) > 1:
            cur_cnt, cur_alpha = heapq.heappop(max_heap) # 每次取当前剩余次数最多的两个字母，并排填上。
            next_cnt, next_alpha = heapq.heappop(max_heap)
            res += (cur_alpha + next_alpha)
            cur_cnt += 1
            next_cnt += 1
            if cur_cnt < 0:
                heapq.heappush(max_heap, (cur_cnt, cur_alpha)) # 将剩余次数入堆
            if next_cnt < 0:
                heapq.heappush(max_heap, (next_cnt, next_alpha))
        if max_heap:
            final_cnt, final_alpha = heapq.heappop(max_heap)
            if final_cnt < -1:
                return ''
            res += final_alpha
        return res

In [None]:
# 767 思路二：先填偶数位再填奇数位

from collections import Counter
import heapq
class Solution:
    def reorganizeString(self, s: str) -> str:
        res = [c for c in s]
        alpha_cnt = Counter(res)
        max_heap = []
        for alpha, cnt in alpha_cnt.items():
            heapq.heappush(max_heap, (-cnt, alpha))
        
        start_id = 0 # 先填偶数位
        max_cnt, _ = max_heap[0]
        if -max_cnt > (len(res) + 1) // 2: # 如果最长的元素数量多于(len(res) + 1) // 2，则无法实现；否则可以。
            return ""
        while max_heap:
            cur_cnt, cur_alpha = heapq.heappop(max_heap)
            while cur_cnt < 0 and start_id < len(s):
                res[start_id] = cur_alpha
                start_id += 2
                cur_cnt += 1
                if start_id >= len(s):
                    start_id = 1 # 再填奇数位
        return "".join(res)

In [1]:
# 1054  和767一样
import heapq
from collections import Counter
class Solution:
    def rearrangeBarcodes(self, barcodes: List[int]) -> List[int]:
        barcodes_cnt = Counter(barcodes)
        res = [0 for bar in barcodes]
        start_id = 0
        max_heap = []
        for barcode, cnt in barcodes_cnt.items():
            heapq.heappush(max_heap, (-cnt, barcode))
        while max_heap:
            cur_cnt, cur_barcode = heapq.heappop(max_heap)
            while cur_cnt < 0:
                res[start_id] = cur_barcode
                cur_cnt += 1
                start_id += 2
                if start_id >= len(barcodes):
                    start_id = 1
        return res

NameError: name 'List' is not defined

In [None]:
# 969 从尾到头，依次排序。把最大的放在最后面。
class Solution:
    def flip(self, arr, k):
        for i in range((k+1)//2):
            arr[i], arr[k-i-1] = arr[k-i-1], arr[i]
        
    def pancakeSort(self, arr: List[int]) -> List[int]:
        end = len(arr) # end_id + 1,从尾到头,把最大的放在最后面。
        res = []
        while end > 1:
            end -= 1
            max_id = 0
            max_val = arr[0]
            change_times = 0 # 记录最大值变更了多少次，用于对已经是顺序的数组，进行early stop
            for i in range(end + 1):
                if arr[i] > max_val:
                    max_val = arr[i]
                    max_id = i
                    change_times += 1
            if change_times == end + 1: # 对已经是顺序的数组，进行early stop
                break
            if max_id == end:
                continue
            self.flip(arr, max_id + 1)
            self.flip(arr, end + 1)
            res.extend([max_id + 1, end + 1])
        return res

In [3]:
# Google interview
# nums with the length n, all the numbers are in [0, n-1]
def SwapInPlace(nums):
    for i in range(len(nums)):
        while nums[i] != i:
            next_id = nums[i]
            if nums[next_id] == next_id:
                return next_id
            nums[i], nums[next_id] = nums[next_id], next_id
    return -1

nums = [2,4,1,5,7,6,1,9,0,2]
print(SwapInPlace(nums))         

2


In [None]:
# 164 桶排序
class Solution:
    def maximumGap(self, nums: List[int]) -> int:
        min_val = min(nums)
        max_val = max(nums)
        if len(nums) == 1 or max_val == min_val:
            return 0
        bucket_dist = int((max_val - min_val) / (len(nums) - 1)) # 最大间距的最小可能值，其实需要向上取整
        if bucket_dist == 0:
            bucket_dist = 1
        buckets = {}
        max_gap = 0
        for num in nums:
            bucket_id = num // bucket_dist
            if bucket_id in buckets:
                # 每个桶只需要保存两个值即可：当前桶内的最大值和当前桶内的最小值
                buckets[bucket_id] = [min(buckets[bucket_id][0], num), max(buckets[bucket_id][1], num)]
            else:
                buckets[bucket_id] = [num, num]
        last_max = -1
        for bucket_id in range(min_val // bucket_dist, max_val // bucket_dist + 1):
            if bucket_id in buckets:
                if last_max > -1:
                    # 用下一个桶的最小值，减去当前桶的最大值，就是落在这两个桶里的相邻两个数之间的间距
                    max_gap = max(max_gap, buckets[bucket_id][0] - last_max)
                last_max = buckets[bucket_id][1]
        return max_gap

# 我的总结

python中sort的使用方式，有两种`sorted()`, `list.sort()`
- `sorted(list, func)`：返回值是排序后的list，使用func可以自定义；方式是，传入两个待比较的元素 x, y，
    - 如果 x 应该排在 y 的前面，返回 -1，
    - 如果 x 应该排在 y 的后面，返回 1。
    - 如果 x 和 y 相等，返回 0。（应该是为了保持稳定性）
    
-  `list.sort(cmp=none, key=none, reverse=none)` (python2), `list.sort(key=none, reverse=none)`(python3). 其中,`cmp`是可自定义的比较函数。在python3中，可以通过以下方法自定义比较函数

- 排序算法在实现的时候，有很多细节需要注意。
    - 快排：先增加index，再交换。否则，当退出循环的时候，index会比预期的多1
    
- 重新实现排序规则！是一个很好用的解题方法。参考例题937，179

- 桶排序可以在某种程度上把排序从nlogn的复杂度降低到线性的。
    - 有时候，tricky的地方在于如何设计桶。参考例题164，220
    
- 对于摆动排序这样的问题，突破点常常在按着什么样的下标规律插入数据。参考例题324， 767

In [4]:
import functools

my_list = [('a', 5), ('bs', 3), ('cs', 4)]
def cmp(x, y):
    if x[1] < y[1]:
        return -1
    if x[1] > y[1]:
        return 1
    return 0

my_list.sort(key=functools.cmp_to_key(cmp))
print(my_list)

[('bs', 3), ('cs', 4), ('a', 5)]


# 现实中的应用