* Visualization of these concepts: https://visualgo.net/en

* https://www.geeksforgeeks.org/ - many implementations

## Heap sort
* Selection sort where we utilize a better structure for storing max(or min values)
* https://www.cs.usfca.edu/~galles/visualization/Heap.html

* https://www.cs.usfca.edu/~galles/visualization/HeapSort.html

In [None]:
## Heap sort is a selection algorithm
# we go through our iterable and select the min or max value out of the unsorted
# the only improvement is that we use a more efficient data structure for storing the min or max values(we have to pick side)
# the problem was that we had to go through all the unsorted values constantly and look for that min or max
# that's where O(n^2) comes from

In [None]:
## https://www.cs.usfca.edu/~galles/visualization/Heap.html - heap data structure visualization

In [17]:
import heapq
def simple_heap_sort(iterable):
    heapq.heapify(iterable) # guaranteed linear time by the library but it is in place
    return [heapq.heappop(iterable) for i in range(len(iterable))] # so no IndexErrors 
# complexity comes from the single [heapq.heappop(iterable) for i in range(len(iterable))]

In [18]:
simple_heap_sort([1,34,6,21,6,1,21,656,6,2,7,0,-33,-2,5])

[-33, -2, 0, 1, 1, 2, 5, 6, 6, 6, 7, 21, 21, 34, 656]

In [42]:
import random
r10k = [random.randint(1,1_000_000) for n in range(10_000)]
r100k = [random.randint(1,1_000_000) for n in range(100_000)]
r1m = [random.randint(1,10_000_000) for n in range(1_000_000)]

In [21]:
# we could use already provided heap data structure in many languages
# in Python https://docs.python.org/3/library/heapq.html
import heapq
def heapsort(iterable):
    h = []
    for value in iterable:
         heapq.heappush(h, value)
    return [heapq.heappop(h) for i in range(len(h))]

In [22]:
heapsort([1,3,6,21,2,3,67,-3,7])

[-3, 1, 2, 3, 3, 6, 7, 21, 67]

In [23]:
nlist = heapsort(r100k)
nlist[:10]

[13, 25, 38, 53, 93, 124, 128, 132, 134, 139]

In [24]:
r100k[:5]

[511231, 657295, 739986, 163740, 8909]

In [25]:
%%timeit
heapsort(r100k)

106 ms ± 5.85 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [27]:
%%timeit
heapsort(r1m)

1.78 s ± 34.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [28]:
%%timeit
heapsort(r10k)

7.77 ms ± 465 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [29]:
%%timeit
sorted(r100k)

25.6 ms ± 1.41 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [30]:
%%timeit
sorted(r1m)

365 ms ± 7.06 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [32]:
# Python program for implementation of heap Sort - GeeksForGeeks Version
 
# To heapify subtree rooted at index i.
# n is size of heap
 
 
def heapify(arr, n, i):
    largest = i  # Initialize largest as root
    l = 2 * i + 1     # left = 2*i + 1
    r = 2 * i + 2     # right = 2*i + 2
 
    # See if left child of root exists and is
    # greater than root
    if l < n and arr[largest] < arr[l]:
        largest = l
 
    # See if right child of root exists and is
    # greater than root
    if r < n and arr[largest] < arr[r]:
        largest = r
 
    # Change root, if needed
    if largest != i:
        arr[i], arr[largest] = arr[largest], arr[i]  # swap
 
        # Heapify the root.
        heapify(arr, n, largest)
 
# The main function to sort an array of given size
 
 
def heap_sort_geeks(arr):
    n = len(arr)
 
    # Build a maxheap.
    for i in range(n//2 - 1, -1, -1):
        heapify(arr, n, i)
 
    # One by one extract elements
    for i in range(n-1, 0, -1):
        arr[i], arr[0] = arr[0], arr[i]  # swap
        heapify(arr, i, 0)
 
 
# Driver code
arr = [12, 11, 13, 5, 6, 7]
heapSort(arr)
n = len(arr)
print("Sorted array is")
for i in range(n):
    print("%d" % arr[i]),
# This code is contributed by Mohit Kumra

Sorted array is
5
6
7
11
12
13


In [33]:
%%timeit
heap_sort_geeks(r10k)

147 ms ± 4.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [34]:
%%timeit
heap_sort_geeks(r100k)

1.89 s ± 77.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


![Heap sort](https://upload.wikimedia.org/wikipedia/commons/1/1b/Sorting_heapsort_anim.gif)

# Quick Sort

![QuickSort](https://upload.wikimedia.org/wikipedia/commons/6/6a/Sorting_quicksort_anim.gif)

#### Hungarian Dance version - https://www.youtube.com/watch?v=ywWBy6J5gz8

In [None]:
# So quick sort algorithm
# choose a pivot (some value)
# partion values - those smaller go left, those bigger go right
# then apply quicksort to these subdivisions

#eventually there are nothing to partition and we are done!

![quick](../imgs/quicksort.png)

$$Complexity: O(nlog(n))$$ $$Worst case : O(n^2)$$

In [35]:
def quickSort(alist):
    quickSortHelper(alist, 0, len(alist) - 1)


def quickSortHelper(alist, first, last):
    if first < last:

        splitpoint = partition(alist, first, last)

        quickSortHelper(alist, first, splitpoint - 1)
        quickSortHelper(alist, splitpoint + 1, last)


def partition(alist, first, last):
    pivotvalue = alist[first]

    leftmark = first + 1
    rightmark = last

    done = False
    while not done:

        while leftmark <= rightmark and alist[leftmark] <= pivotvalue:
            leftmark = leftmark + 1

        while alist[rightmark] >= pivotvalue and rightmark >= leftmark:
            rightmark = rightmark - 1

        if rightmark < leftmark:
            done = True
        else:
            temp = alist[leftmark]
            alist[leftmark] = alist[rightmark]
            alist[rightmark] = temp

    temp = alist[first]
    alist[first] = alist[rightmark]
    alist[rightmark] = temp

    return rightmark


alist = [54, 26, 93, 17, 77, 31, 44, 55, 20]
quickSort(alist)
print(alist)

[17, 20, 26, 31, 44, 54, 55, 77, 93]


In [37]:
def naive_quicksort(it):
    if len(it) <= 1: # it is possible to get an empty list/array
        return it
    pivot = it[0] # random would be even better
    # so partitioning will be 2 linear runs and also will take extra memory unlike optimized quicksort
    left = [n for n in it if n < pivot]  # assuming no duplicates
    right = [n for n in it if n > pivot] 
    return naive_quicksort(left) + [pivot] + naive_quicksort(right)

In [None]:
# so quicksort reccurence would be
T(n) = 2T(n/2) + n # soT(n/2) would be average case 
# worst case would be with pivots at the wrong end (not middle)
T(n) = T(1) + T(n-1) + n # which lead so quadratic complexity
# this could in real life scenario if you were applying naive quicksort to reversely ordered list

In [38]:
naive_quicksort([54, 26, 93, 17, 77, 31, 44, 55, 20])

[17, 20, 26, 31, 44, 54, 55, 77, 93]

In [43]:
r10k[:5]

[613029, 52333, 449512, 371646, 883629]

In [44]:
r100k[:5]

[912105, 490927, 804951, 879203, 126615]

In [45]:
r1m[:5]

[2879817, 533953, 711291, 1660192, 8329680]

In [46]:
%%timeit
naive_quicksort(r10k)

36.7 ms ± 747 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [47]:
%%timeit
naive_quicksort(r100k)

461 ms ± 6.48 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [48]:
%%timeit
naive_quicksort(r1m)

7.68 s ± 782 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
# Turns out it can be proven (see Cormen- CLRS) that n log n is the best we can do for sorting algorithms which involve
# comparisons

In [None]:
# well those are so called bucket sorts, you have counting sort, radix sort, bucket sort
# these will have O(n compentt + some k some other component) - k being something to do with data

In [49]:
# Python program for counting sort - GeeksForGeeks
 
# The main function that sort the given string arr[] in 
# alphabetical order
def countSort(arr):
 
    # The output character array that will have sorted arr
    output = [0 for i in range(len(arr))]
 
    # Create a count array to store count of inidividul
    # characters and initialize count array as 0
    count = [0 for i in range(256)]
 
    # For storing the resulting answer since the 
    # string is immutable
    ans = ["" for _ in arr]
 
    # Store count of each character
    for i in arr:
        count[ord(i)] += 1
 
    # Change count[i] so that count[i] now contains actual
    # position of this character in output array
    for i in range(256):
        count[i] += count[i-1]
 
    # Build the output character array
    for i in range(len(arr)):
        output[count[ord(arr[i])]-1] = arr[i]
        count[ord(arr[i])] -= 1
 
    # Copy the output array to arr, so that arr now
    # contains sorted characters
    for i in range(len(arr)):
        ans[i] = output[i]
    return ans 
 
# Driver program to test above function
arr = "geeksforgeeks"
ans = countSort(arr)
print("Sorted character array is % s" %("".join(ans)))
 
# This code is contributed by Nikhil Kumar Singh

Sorted character array is eeeefggkkorss


In [51]:
ans = countSort("Valdis teaching at RTU")
"".join(ans)

'   RTUVaaacdeghiilnstt'

In [53]:
# https://en.wikipedia.org/wiki/Radix_sort
# Python program for implementation of Radix Sort 
# A function to do counting sort of arr[] according to 
# the digit represented by exp. 
  
def countingSort(arr, exp1): 
  
    n = len(arr) 
  
    # The output array elements that will have sorted arr 
    output = [0] * (n) 
  
    # initialize count array as 0 
    count = [0] * (10) 
  
    # Store count of occurrences in count[] 
    for i in range(0, n): 
        index = (arr[i] / exp1) 
        count[int(index % 10)] += 1
  
    # Change count[i] so that count[i] now contains actual 
    # position of this digit in output array 
    for i in range(1, 10): 
        count[i] += count[i - 1] 
  
    # Build the output array 
    i = n - 1
    while i >= 0: 
        index = (arr[i] / exp1) 
        output[count[int(index % 10)] - 1] = arr[i] 
        count[int(index % 10)] -= 1
        i -= 1
  
    # Copying the output array to arr[], 
    # so that arr now contains sorted numbers 
    i = 0
    for i in range(0, len(arr)): 
        arr[i] = output[i] 
  
# Method to do Radix Sort 
def radixSort(arr): 
  
    # Find the maximum number to know number of digits 
    max1 = max(arr) 
  
    # Do counting sort for every digit. Note that instead 
    # of passing digit number, exp is passed. exp is 10^i 
    # where i is current digit number 
    exp = 1
    while max1 / exp > 0: 
        countingSort(arr, exp) 
        exp *= 10
  
  
# Driver code 
arr = [170, 45, 75, 90, 802, 24, 2, 66] 
  
# Function Call 
radixSort(arr) 
  

print(arr)
  
# This code is contributed by Mohit Kumra 
# Edited by Patrick Gallagher 

[2, 24, 45, 66, 75, 90, 170, 802]


In [55]:
r1k = [random.randint(1,10_000) for n in range(1_000)]
r1k[:5]

[6541, 3499, 5590, 5432, 1233]

In [56]:
%%timeit
radixSort(r1k)

802 ms ± 16 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
