# EC2202 Sorting

**Disclaimer.**
This code examples are based on 
1. [MIT 6.006 (Professor Erik Demaine, Dr. Jason Ku, and Professor Justin Solomon)](https://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-006-introduction-to-algorithms-spring-2020/index.htm)
2. [KAIST CS206 (Professor Otfried Cheong)](https://otfried.org/courses/cs206/)
3. [LeetCode](https://leetcode.com/)
4. [GeeksForGeeks](https://practice.geeksforgeeks.org/)
5. Coding Interviews

In [None]:
import doctest
import random
import time

## 'ppp' Exercises

### Q3

[Adobe] There is an integer array `nums` sorted in ascending order (with **distinct** values).

Prior to being passed to your function, `nums` is **possibly rotated** at an unknown pivot index `k` `(1 <= k < nums.length)` such that the resulting array is `[nums[k], nums[k+1], ..., nums[n-1], nums[0], nums[1], ..., nums[k-1]]` (0-indexed). For example, `[0,1,2,4,5,6,7]` might be rotated at pivot index `3` and become `[4,5,6,7,0,1,2]`.

Given the array `nums` after the possible rotation and an integer `target`, return the index of `target` if it is in `nums`, or -1 if it is not in `nums`.

You must write an algorithm with `O(log n)` runtime complexity.

In [None]:
def search(nums, target):
  '''
  >>> search([4, 5, 6, 7, 0, 1, 2], 0)
  4
  >>> search([4, 5, 6, 7, 0, 1, 2], 3)
  -1
  >>> search([1], 0)
  -1
  '''
  low = 0
  high = len(nums)
  while low < high:
    mid = low + (high - low) // 2
    if nums[mid] == target:
      return mid
    
    # rotation center is on the right half
    #   [3, 4, 5, 6, 7, 8, 0, 1, 2]
    # p                   ^
    # l  ^
    # m              ^
    # h                          ^
    if nums[low] <= nums[mid]:
      if target >= nums[low] and target < nums[mid]:
        high = mid
      else:
        low = mid + 1
    else:  
      # rotation center is on the left half
      #   [7, 8, 0, 1, 2, 3, 4, 5, 6]
      # p       ^
      # l  ^
      # m              ^
      # h                          ^
      if target <= nums[high-1] and target > nums[mid]:
        low = mid + 1
      else:
        high = mid
  return -1

  # # solution #2
  # # step 1. binary search for finding the pivot
  # left, right = 0, len(nums) - 1
  # while left < right:
  #   mid = left + (right - left) // 2
  #   if nums[mid] > nums[right]:
  #     left = mid + 1
  #   else:
  #     right = mid
  # pivot = left

  # # step 2. binary search for finding the target
  # left, right = 0, len(nums) - 1
  # while left <= right:
  #   mid = left + (right - left) // 2
  #   real_mid = (mid + pivot) % len(nums)
  #   if nums[real_mid] < target:
  #     left = mid + 1
  #   elif nums[real_mid] > target:
  #     right = mid - 1
  #   else:
  #     return real_mid
  # return -1

In [None]:
doctest.run_docstring_examples(search, globals(), False, __name__)

## Sorting

### Motivation

Algorithms often use sorting as a key subroutine. For example, consider the problem of checking whether a list contains duplicated data: The first of the following two algorithms takes O(n^2) time, while the second one uses sorting and then takes only linear time.

In [None]:
def has_duplicates(a):
  for i in range(len(a)):
    for j in range(i+1, len(a)):
      if a[i] == a[j]:
        return True
  return False

In [None]:
# This function assumes that a is sorted!
def has_duplicates_sorted(a):
  for i in range(len(a)-1):
    if a[i] == a[i+1]:
      return True
  return False

Let's test our implementation

In [None]:
w = list(range(1, 20000))
w.append(8888)
random.shuffle(w)

t1 = time.time()
print(has_duplicates(w))
t2 = time.time()
print("time spent:", t2-t1)

True
time spent: 9.977123260498047


In [None]:
def has_duplicates(a):
  return has_duplicates_sorted(sorted(a))

In [None]:
w = list(range(1, 100000))
w.append(8888)
random.shuffle(w)

t1 = time.time()
print(has_duplicates(w))
t2 = time.time()
print("time spent:", t2-t1)

True
time spent: 0.025548696517944336


### Selection Sort

We find the smallest element, recursively sort the rest of the list, and concatenate the two pieces:

In [None]:
# find index of minimum in a
# O(N)
def find_min_index(a):
  mindex = 0
  for k in range(1, len(a)):
    if a[k] < a[mindex]:
      mindex = k
  return mindex

# T(N) = T(N-1) + O(N) #N-1개를 할 때의 시간 : T(N-1)
# T(N) = O(N^2)
def selection_sort(a):
  if len(a) <= 1:
    return a
  k = find_min_index(a)
  b = selection_sort(a[:k] + a[k+1:])
  return [a[k]]+b

Let's test our implmementation

In [None]:
w = [ random.randrange(1000) for i in range(100) ]
print(w)
ws = selection_sort(w)
print(ws)

In-place + recursive implementation

In [None]:
# sort a[i:]
def selection_sort(a, i):
  if len(a) - i <= 1:
    return
  k = find_min_index(a, i)
  # exchange a[i] and a[k]
  t = a[i] #a[i], a[k] = a[k], a[i]
  a[i] = a[k]
  a[k] = t
  # sort the rest
  selection_sort(a, i+1)

In-place + iterative implementation

In [None]:
def selection_sort(a):
  n = len(a)
  for i in range(0, n-1):
    k = find_min_index(a, i)
    # exchange a[i] and a[k]
    t = a[i]
    a[i] = a[k]
    a[k] = t #in place = 새로운 리스트 생성 X 

### Insertion Sort

Insertion sort uses recursion the other way round: we recursively sort n − 1 elements, and finally insert the remaining element into the sorted list. It is based on the observation that it is easy to insert a new element into a sorted list. Here is a version that keeps the original data intact:

In [None]:
def sorted_linear_search(a, x): #sorted된 a안에 x를 어디에 넣을지 지정 
  for i in range(len(a)):
    if a[i] >= x:
      return i
  return len(a)

def insertion_sort(a):
  if len(a) <= 1:
    return a
  b = insertion_sort(a[:-1]) #맨 뒤 제외 a의 나머지를 정렬시킴. 
  k = sorted_linear_search(b, a[-1]) #맨 뒤를 올바른 자리에 넣음 
  b.insert(k, a[-1])
  return b

In [None]:
w = [ random.randrange(1000) for i in range(100) ]
print(w)
ws = insertion_sort(w)
print(ws)

**'ppp' exercise** In-place implementation

In [None]:
# sort a[:j]
def insertion_sort(a, j):
  if j <= 1:
    return
  insertion_sort(a, j-1)
  k = j-1     # remaining element index
  x = a[k]    # value of remaining element
  while k > 0 and a[k-1] > x:
    a[k] = a[k-1]
    k -= 1
  a[k] = x

In-place + iterative implementation

In [None]:
def insertion_sort(a):
  for j in range(2, len(a)+1):
    # a[:j-1] is already sorted
    k = j-1        # remaining element index
    x = a[k]       # value of remaining element
    while k > 0 and a[k-1] > x:
      a[k] = a[k-1]
      k -= 1
    a[k] = x

### Bubble Sort

It’s called “bubble sort” because large elements “rise” to the end of the array like bubbles in a carbonated drink. What makes it so simple is the fact that it only uses exchanges of adjacent elements:

**'ppp' exercise**

In [None]:
def bubble_sort(a):
  for last in range(len(a), 1, -1):
    # bubble max in a[:last] to a[last-1]
    for j in range(last-1):
      if a[j] > a[j+1]:
        t = a[j]
        a[j] = a[j+1]
        a[j+1] = t

One observation about bubble sort is that we can stop once a bubble phase has made no more change—then we know that the array is already in sorted order.

In [None]:
def bubble_sort(a):
  for last in range(len(a), 1, -1):
    # bubble max in a[:last] to a[last-1]
    flipped = False
    for j in range(last-1):
      if a[j] > a[j+1]:
        flipped = True
        t = a[j]
        a[j] = a[j+1]
        a[j+1] = t
    if not flipped: #모두 sort되어 있을 때 
      return

Does this improve the time complexity of the algorithm? In the best case, when the input data is already sorted, the running time improves from O(n^2) to O(n). The case of sorted or nearly-sorted input is important, so this is an important improvement.

Unfortunately, in the worst case early termination does not help. The reason is that in every bubble round, the smallest element in the list can only move one position down. So if we start with any list where the smallest element is in the last position, it must take n − 1 bubble rounds to finish. And therefore bubble sort with early termination still takes quadratic time in the worst case.

### Merge Sort

# All the sorting algorithms we have seen so far have a time complexity of*** O(n^2)***.

O(n): 하나씩 거쳐갈때
O(n^2): 하나씩 거쳐가면서 다른 하나를 거칠 때

We split the list into two halves, sort each sublist recursively, and then merge the two sorted lists.. 

In [None]:
def merge(a, b): #a, b 둘 다 sorted 됨. 
  i = 0
  j = 0
  res = []
  while i < len(a) and j < len(b):
    va = a[i]
    vb = b[j]
    if va <= vb:
      res.append(va)
      i += 1
    else:
      res.append(vb)
      j += 1
  # now just copy remaining elements
  # (only one of these can be non-empty)
  res.extend(a[i:])
  res.extend(b[j:])
  return res

def merge_sort(a):
  if len(a) <= 1:
    return a
  mid = len(a) // 2
  return merge(merge_sort(a[:mid]), merge_sort(a[mid:])) #재귀로 sort되게 함. 

In [None]:
n = 1000000
w = [ random.randrange(1000000) for i in range(n) ]
print(w[:100])
startTime = time.time()
w = merge_sort(w)
stopTime = time.time()
print(w[:100])

print("Runtime %g secs" % (stopTime - startTime))

### Quick Sort

In Merge-Sort, the divide step is trivial, and the combine step is where all the work is done. In Quick-Sort it is the other way round: the combine step is trivial, and all the work is done in the divide step:


In [None]:
def quick_sort(a):
  if len(a) <= 1:
    return a
  pivot = a[len(a) // 2]
  small = []
  equal = []
  large = []
  for x in a:
    if x < pivot:
      small.append(x)
    elif x == pivot:
      equal.append(x)
    else:
      large.append(x)
  return quick_sort(small) + equal + quick_sort(large)

One advantage of Quick-Sort compared to Merge-Sort is that it can be implemented as an in-place algorithm, needing no extra space except the array storing the elements:

In [None]:
# partition range a[lo:hi+1] and return index of pivot
def partition(a, lo, hi):
  p = (lo + hi)//2
  pivot = a[p]
  a[p] = a[hi]  # Swap pivot with last item ##pivot: p(중간)에 있는 값. a[p] <-> a[hi]
  a[hi] = pivot

  i = lo - 1
  j = hi
  while i < j: 
    i += 1 
    while a[i] < pivot: 
      i += 1
    j -= 1
    while a[j] > pivot and j > lo: 
      j -= 1
    if i < j:
      t = a[i]; a[i] = a[j]; a[j] = t  # swap a[i] and a[j]
  a[hi] = a[i]
  a[i] = pivot # Put pivot where it belongs
  return i     # index of pivot

# sort range a[lo:hi+1]
def quick_sort(a, lo, hi):
  if (lo < hi):
    pivotIndex = partition(a, lo, hi)
    quick_sort(a, lo, pivotIndex - 1)
    quick_sort(a, pivotIndex + 1, hi)