# Chapter 12. Sorting and Selection

The chapter covers sorting and selection algorithms which were not yet covered: Merge-Sort, Quick-Sort and the lower bound for sorting. 

## Important Algorithms and Data Structures

In [None]:
# my implementation of the Merge Sort
# there is more code than in the book version, but for me this code is more straightforward and clear
def merge(A1, A2, A):
    i = j = k = 0
    while i < len(A1) and j < len(A2):
        if A1[i] < A2[j]:
            A[k] = A1[i]
            i += 1
        else:
            A[k] = A2[j]
            j += 1
        k += 1 
    while i < len(A1):
        A[k] = A1[i]
        i += 1
        k += 1
    while j < len(A2):
        A[k] = A2[j]
        j += 1
        k += 1


def merge_sort(A):
    if len(A) < 2:
        return
    mid = len(A) // 2
    A1 = A[0:mid]
    A2 = A[mid:len(A)]
    merge_sort(A1)
    merge_sort(A2)
    merge(A1, A2, A)
    return A

# my implementation of an in-lace version of the Quick Sort (the book used LinkedQueue for this)
# input: quick_sort(A, 0, len(A)-1)
def pivot_sort(A, start, end):
    pivot = A[end]
    leading_index = 0
    i = 0
    while i < end:
        if A[i] <= pivot:
            A[i], A[leading_index] = A[leading_index], A[i]
            leading_index += 1
        i += 1
    A[end], A[leading_index] = A[leading_index], A[end]
    return leading_index

def quick_sort(A, start, end):
    if end > start:
        leading_index = pivot_sort(A, start, end)
        quick_sort(A, start, leading_index - 1)
        quick_sort(A, leading_index + 1, end)
        return A
    

## Reinforcement

### R-12.7

Suppose we are given two ${n}$-element sorted sequences ${A}$ and ${B}$ each with distinct elements, but potentially some elements that are in both sequences. Describe an ${O(n)}$-time method for computing a sequence representing the union ${A∪B}$ (with no duplicates) as a sorted sequence.

In [None]:
# the logic is to merge two lists and then remove dublicates
# for merging I use the merge() function from the merge sort
# for removing dublicates I use one of the functions from C-12.26
# O(n) for merge() + O(n) for remove_dublicates_union() = O(n) time-complexity
def merge(A1, A2, A):
    i = j = k = 0
    while i < len(A1) and j < len(A2):
        if A1[i] < A2[j]:
            A[k] = A1[i]
            i += 1
        else:
            A[k] = A2[j]
            j += 1
        k += 1 
    while i < len(A1):
        A[k] = A1[i]
        i += 1
        k += 1
    while j < len(A2):
        A[k] = A2[j]
        j += 1
        k += 1
        
def remove_dublicates_union(A):
    n = len(A)
    j = 0
    for i in range(n-1):
        if A[i] != A[i+1]:
            A[j] = A[i]
            j += 1
    A[j] = A[n-1]
    return A[:j+1]

def union(A, B):
    n = len(A) # both of the same length
    res = [0]*2*n
    merge(A, B, res)
    return remove_dublicates_union(res)
    

## Creativity

### C-12.26

Describe and analyze an efficient method for removing all duplicates from a collection ${A}$ of ${n}$ elements.

In [None]:
# O(nlogn) for sorting + O(n) amortized  for traversal and appending = O(nlogn) time-complexity (amortized)
# O(n) space-complexity for the additional list res
def remove_dublicates(A):
    A.sort()
    res = []
    n = len(A)
    for i in range(n-1):
        if A[i] != A[i+1]:
            res.append(A[i])
    res.append(A[n-1])
    return res

# you can also do it in-place
# O(nlogn) for sorting + O(n) for traversal = O(nlogn) time complexity (without amortization)
# O(1) space-complexity
def remove_dublicates_inplace(A):
    A.sort()
    n = len(A)
    j = 0
    for i in range(n-1):
        if A[i] != A[i+1]:
            A[j] = A[i]
            j += 1
    A[j] = A[n-1]
    return A[:j+1]

# how it is done without playing the algorithm game
def remove_dublicates_obvious(A):
    return list(set(A))


### C-12.30

Modify our in-place quick-sort implementation of Code Fragment 12.6 to be a randomized version of the algorithm, as discussed in Section 12.3.1.

In [101]:
from random import randint

# requires a little change in the main quick sort function
# we choose a random pivot and then move it to the end 
# the rest is the same
def pivot_sort(A, start, end):
    pivot = A[end]
    leading_index = 0
    i = 0
    while i < end:
        if A[i] <= pivot:
            A[i], A[leading_index] = A[leading_index], A[i]
            leading_index += 1
        i += 1
    A[end], A[leading_index] = A[leading_index], A[end]
    return leading_index

def quick_sort(A, start, end):
    if end > start:
        pivot_index = randint(start, end)
        A[pivot_index], A[end] = A[end], A[pivot_index]
        leading_index = pivot_sort(A, start, end)
        quick_sort(A, start, leading_index - 1)
        quick_sort(A, leading_index + 1, end)
        return A


### C-12.41

Given a sequence ${S}$ of ${n}$ elements, on which a total order relation is defined, describe an efficient method for determining whether there are two equal elements in ${S}$. What is the running time of your method?

In [None]:
# if I understand the problem correctly, just determine if there are dublicates in an int A[]
# O(nlogn) for sorting + O(n) for traversal = O(nlogn) time-complexity 
def int_dublicates(A):
    n = len(A)
    if n < 2:
        return True
    A.sort()
    for i in range(n-1):
        if A[i] == A[i+1]:
            return False
    return True
