In [2]:
from hypothesis import given
import hypothesis.strategies as st

# Sorting Algorithms

In this notebook I implement various sorting algorithms, test that they are correct using property based testing as implemented by the [Hypothesis](https://hypothesis.readthedocs.io/en/latest/) library. I then compare the performance of my implementation with the Python standard library built-in `sorted` function.

## Selection Sort

In [3]:
def _swap(sequence, i, j):
    """Swap element in position i with element in position j."""
    tmp = sequence[i]
    sequence[i] = sequence[j]
    sequence[j] = tmp


def selection_sort(sequence):
    """Implementation of selection sort algorithm."""
    n = len(sequence)
    for i in range(n):
        min_index = i
        for j in range(i + 1, n):
            if sequence[j] < sequence[min_index]:
                min_index = j
        _swap(sequence, i, min_index)


In [11]:
@given(a=st.lists(st.integers()))
def test_selection_sort(a):
    sorted_a = sorted(a)
    selection_sort(a)
    assert sorted_a == a 

In [12]:
test_selection_sort()

## Merge Sort

In [13]:
def _merge(sequence1, sequence2):
    """Merge two sequences."""
    merged = []
    while len(sequence1) > 0 and len(sequence2) > 0:
        b = sequence1[0]
        c = sequence2[0]
        if b <= c:
            sequence1 = sequence1[1:]
            merged.append(b)
        else:
            sequence2 = sequence2[1:]
            merged.append(c)
    return merged + sequence1 + sequence2


def merge_sort(sequence):
    """Implementation of the merge sort algorithm."""
    n = len(sequence)
    if n == 1:
        sorted_sequence = sequence
    else:
        m = n // 2
        left = merge_sort(sequence[:m])
        right = merge_sort(sequence[m:])
        sorted_sequence = _merge(left, right)
    return sorted_sequence

In [14]:
@given(a=st.lists(st.integers()))
def test_merge_sort(a):
    sorted_a = sorted(a)
    assert sorted(a) == merge_sort(a) 

## Quick Sort

In [33]:
import random


def _partition(sequence, l, r):
    v = sequence[l]
    j = l;
    for i in range(l + 1, r + 1):
        if sequence[i] <= v:
            j += 1
            _swap(sequence, i, j)
    _swap(sequence, l, j)
    return j


def _quick_sort(sequence, l, r):
    if l >= r:
        return None
    else:
        p = random.randint(l, r)
        _swap(sequence, l, p)
        m = _partition(sequence, l, r)
        _quick_sort(sequence, l, m - 1)
        _quick_sort(sequence, m + 1, r)


def _tail_recursive_quick_sort(sequence, l, r):
    while l < r:
        p = random.randint(l, r)
        _swap(sequence, l, p)
        m = _partition(sequence, l, r)
        if (m - l) < (r - m):
            _tail_recursive_quick_sort(sequence, l, m - 1)
            l = m + 1
        else:
            _tail_recursive_quick_sort(sequence, m + 1, r)
            r = m - 1


def quick_sort(sequence):
    """Implementation of quick sort algorithm."""
    _quick_sort(sequence, 0, len(sequence) - 1)

    

def tail_recursive_quick_sort(sequence):
    """Implementation of quick sort algorithm."""
    _tail_recursive_quick_sort(sequence, 0, len(sequence) - 1)


In [34]:
@given(a=st.lists(st.integers()))
def test_quick_sort(a):
    sorted_a = sorted(a)
    quick_sort(a)
    assert sorted_a == a 

    
@given(a=st.lists(st.integers()))
def test_tail_recursive_quick_sort(a):
    sorted_a = sorted(a)
    tail_recursive_quick_sort(a)
    assert sorted_a == a 

In [35]:
test_quick_sort()

In [36]:
test_tail_recursive_quick_sort()

## Quick Sort 3

In [25]:
def _dijkstra_three_way_partitioning(sequence, l, r):
    """
    Return indices m1, m2 that partion a sequence into three segments.

    The first segment sequence[:m1] includes all of the elements that are
    strictly less than sequence[0]; second segment sequence[m1:m2+1] contains
    all of the elements of the sequence that are equal to sequence[0]; the
    final segment contains all the elements of the sequence that are strictly
    greater than sequence[0].

    """
    v = sequence[l]
    lt, gt = l, r
    i = lt
    while i <= gt:
        if sequence[i] < v:
            _swap(sequence, i, lt)
            lt += 1
            i += 1
        elif sequence[i] > v:
            _swap(sequence, i, gt)
            gt -= 1
        else:
            i += 1

    return lt, gt


def _quick_sort_3(sequence, l, r):
    if l >= r:
        return None
    else:
        p = random.randint(l, r)
        _swap(sequence, l, p)
        m1, m2 = _dijkstra_three_way_partitioning(sequence, l, r)
        _quick_sort_3(sequence, l, m1 - 1)
        _quick_sort_3(sequence, m2 + 1, r)


def _tail_recursive_quick_sort_3(sequence, l, r):
    while l < r:
        p = random.randint(l, r)
        _swap(sequence, l, p)
        m1, m2 = _dijkstra_three_way_partitioning(sequence, l, r)
        if (m1 - l) < (r - m2):
            _tail_recursive_quick_sort_3(sequence, l, m1 - 1)
            l = m1 + 1
        else:
            _tail_recursive_quick_sort_3(sequence, m2 + 1, r)
            r = m2 - 1


def quick_sort_3(sequence):
    """Implementation of quick sort algorithm."""
    _quick_sort_3(sequence, 0, len(sequence) - 1)
    

def tail_recursive_quick_sort_3(sequence):
    """Tail recursive implementation of quick sort algorithm."""
    _tail_recursive_quick_sort_3(sequence, 0, len(sequence) - 1)

In [26]:
@given(a=st.lists(st.integers()))
def test_quick_sort_3(a):
    sorted_a = sorted(a)
    quick_sort_3(a)
    assert sorted_a == a 
    

@given(a=st.lists(st.integers()))
def test_tail_recursive_quick_sort_3(a):
    sorted_a = sorted(a)
    tail_recursive_quick_sort_3(a)
    assert sorted_a == a 

In [24]:
test_quick_sort_3()

In [27]:
test_tail_recursive_quick_sort_3()

# Performance

Selection Sort is $O(n^2)$ and should generally perform poorly relative to the alternatives for long sequences.  However it is possible that for sorting short sequences, selection sort can out perform other alternatives.

In [56]:
random_sequence = [random.randint(-100, 100) for i in range(1000)]

In [57]:
%timeit sorted(random_sequence)

119 µs ± 418 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [58]:
%timeit selection_sort(random_sequence)

32.2 ms ± 708 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [59]:
%timeit merge_sort(random_sequence)

2.67 ms ± 54.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [60]:
%timeit quick_sort(random_sequence)

3.36 ms ± 7.99 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [61]:
%timeit tail_recursive_quick_sort(random_sequence)

3.41 ms ± 66.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [62]:
%timeit quick_sort_3(random_sequence)

2.35 ms ± 16.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [63]:
%timeit tail_recursive_quick_sort_3(random_sequence)

4.47 ms ± 13.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Numba

How much can [Numba](https://numba.pydata.org/) improve the performance of my Python implementation of Quick Sort? Significantly! Just adding the `@jit` decorator is enough for my Python implementation of Quick Sort to out-perform the standard library's implementation by a factor of 2x!

In [64]:
from numba import jit


@jit
def _swap(sequence, i, j):
    """Swap element in position i with element in position j."""
    tmp = sequence[i]
    sequence[i] = sequence[j]
    sequence[j] = tmp

@jit
def _dijkstra_three_way_partitioning(sequence, l, r):
    """
    Return indices m1, m2 that partion a sequence into three segments.

    The first segment sequence[:m1] includes all of the elements that are
    strictly less than sequence[0]; second segment sequence[m1:m2+1] contains
    all of the elements of the sequence that are equal to sequence[0]; the
    final segment contains all the elements of the sequence that are strictly
    greater than sequence[0].

    """
    v = sequence[l]
    lt, gt = l, r
    i = lt
    while i <= gt:
        if sequence[i] < v:
            _swap(sequence, i, lt)
            lt += 1
            i += 1
        elif sequence[i] > v:
            _swap(sequence, i, gt)
            gt -= 1
        else:
            i += 1

    return lt, gt

@jit
def _quick_sort_3(sequence, l, r):
    if l >= r:
        return None
    else:
        p = random.randint(l, r)
        _swap(sequence, l, p)
        m1, m2 = _dijkstra_three_way_partitioning(sequence, l, r)
        _quick_sort_3(sequence, l, m1 - 1)
        _quick_sort_3(sequence, m2 + 1, r)

@jit
def quick_sort_3(sequence):
    """Implementation of quick sort algorithm."""
    _quick_sort_3(sequence, 0, len(sequence) - 1)
    


In [65]:
# numba compilation improves on Python built-in by factor of 2!
%timeit quick_sort_3(random_sequence)

51.3 µs ± 4.28 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
