# Sorting Algorithms

In [27]:
import numpy as np
import sys

In [28]:
print(sys.version)
print(np.__name__.capitalize(), np.__version__)

3.6.2 |Anaconda custom (x86_64)| (default, Jul 20 2017, 13:14:59) 
[GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.57)]
Numpy 1.13.1


### A quick utility...

In [29]:
def gen_list(length, upper=100):
    return list(np.random.randint(0, upper, length))

## 1. Selection Sort

### Three Options

In [30]:
def sel_sort(list_in):
    '''Implements selection sort iteratively'''
    
    if len(list_in) == 1:
        return list_in

    ix = 0
    while ix < (len(list_in) - 1):
        min_val = list_in[ix]
        min_ix = ix
        for sub_ix, sub_min in enumerate(list_in[ix:], ix):
            if sub_min < min_val:
                min_val = sub_min
                min_ix = sub_ix
        if min_val != list_in[ix]:
            list_in[ix], list_in[min_ix] = list_in[min_ix], list_in[ix]
        ix += 1

#     for ix, n in enumerate(list_in[:-1]):
#         min_val = n
#         min_ix = ix
#         for ix_sub, n_sub in enumerate(list_in[ix + 1:], ix):
#             if n_sub < min_val:
#                 min_val = n_sub
#                 min_ix = ix_sub
#         if min_val != list_in[ix]:
#             list_in[ix], list_in[min_ix] = list_in[min_ix], list_in[ix]
            
    return list_in

In [31]:
def sel_sort2(list_in):
    '''Iterative selection sort using built-in min()'''
    
    if len(list_in) == 1:
        return list_in
    
    ix = 0
    while ix < (len(list_in) - 1):
        first = list_in[ix]
        min_val = min(list_in[ix:])
        if min_val < first:
            min_ix = list_in[ix:].index(min_val) + ix
            list_in[ix], list_in[min_ix] = list_in[min_ix], list_in[ix]
        ix += 1
        
    return list_in    

In [32]:
def sel_sort_rec(list_in):
    '''Implements selection sort recursively'''
    
    if len(list_in) == 1:
        return list_in
    else:
        min_val = list_in[0]
        min_ix = 0
        for ix, n in enumerate(list_in[:]):
            if n < min_val:
                min_val = n
                min_ix = ix
            # print("Min: [{}] {}".format(min_ix, min_val))
        if min_ix != list_in[0]:
            # print(f"Swapping {list_in[0]} and {list_in[min_ix]}")
            list_in[0], list_in[min_ix] = list_in[min_ix], list_in[0]
        # print(f"First: {list_in[:1]}")
        # print(f"Rest: {list_in[1:]}\n")
        return list_in[:1] + sel_sort_rec(list_in[1:])

In [106]:
foo = gen_list(20)
sel1 = sel_sort(foo[:])
sel2 = sel_sort2(foo[:])
selr = sel_sort_rec(foo[:])
print(sel1 == sel2 == selr)
print(sel1)
print(sel2)
print(selr)

True
[11, 18, 19, 26, 30, 31, 31, 33, 39, 49, 52, 53, 55, 55, 60, 64, 70, 75, 85, 97]
[11, 18, 19, 26, 30, 31, 31, 33, 39, 49, 52, 53, 55, 55, 60, 64, 70, 75, 85, 97]
[11, 18, 19, 26, 30, 31, 31, 33, 39, 49, 52, 53, 55, 55, 60, 64, 70, 75, 85, 97]


### Timing

#### 10-length list 

In [81]:
test = gen_list(10)
test

[7, 94, 38, 78, 38, 67, 57, 14, 8, 19]

In [82]:
%%timeit
sel_sort(test[:])

13.4 µs ± 708 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [83]:
%%timeit
sel_sort2(test[:])

12.2 µs ± 978 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [84]:
%%timeit
sel_sort_rec(test[:])

18.3 µs ± 1.9 µs per loop (mean ± std. dev. of 7 runs, 100000 loops each)


#### 100-length list

In [102]:
test = gen_list(100)
len(test)

100

In [104]:
%%timeit
sel_sort(test[:])

495 µs ± 24.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [105]:
%%timeit
sel_sort2(test[:])

396 µs ± 12 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [103]:
%%timeit
sel_sort_rec(test[:])

589 µs ± 25.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


#### 1000-length list

In [107]:
test = gen_list(1000)
len(test)

1000

In [109]:
%%timeit
sel_sort(test[:])

52.2 ms ± 5.76 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [110]:
%%timeit
sel_sort2(test[:])

31.2 ms ± 1.66 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [108]:
%%timeit
sel_sort_rec(test[:])

50.3 ms ± 3.88 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


#### 10000-length list

In [111]:
test = gen_list(10000)
len(test)

10000

In [112]:
%%timeit
sel_sort(test[:])

5.13 s ± 437 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [113]:
%%timeit
sel_sort2(test[:])

3.13 s ± 262 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [65]:
%%timeit
sel_sort_rec(test[:])

RecursionError: maximum recursion depth exceeded in comparison

Max recursion depth reached!!

#### 100000-length list

In [73]:
test = gen_list(100000)
len(test)

100000

In [74]:
# %%timeit
# sel_sort_rec(test[:])

In [None]:
# %%timeit
# sel_sort(test[:])

Takes a long, long time -- probably ~500s 

## 2. Insertion Sort

In [33]:
def ins_sort(list_in):
    '''Iterative insertion sort'''
    
    if len(list_in) == 1:
        return list_in
    
    for ix, n in enumerate(list_in):
        for ix_sub, n_sub in enumerate(list_in[:ix][::-1]):
            ix_real = ix - 1 - ix_sub
            if n < n_sub:
                list_in[ix_real], list_in[ix_real + 1] = n, n_sub
            else:
                break
                
    return list_in

In [56]:
test_list = gen_list(20)
test_list

[61, 65, 75, 72, 92, 55, 31, 9, 39, 51, 2, 61, 55, 90, 91, 82, 14, 13, 17, 43]

In [57]:
ins_result = ins_sort(test_list[:])
sel_result = sel_sort(test_list[:])
print(ins_result == sel_result)
print(ins_result)
print(sel_result)

True
[2, 9, 13, 14, 17, 31, 39, 43, 51, 55, 55, 61, 61, 65, 72, 75, 82, 90, 91, 92]
[2, 9, 13, 14, 17, 31, 39, 43, 51, 55, 55, 61, 61, 65, 72, 75, 82, 90, 91, 92]


## 3. Merge Sort

In [18]:
def linear_merge(left, right):
    '''Merges two sorted lists into one'''
    
    # edges & checks -- how to deal with zero-length list?
    # assert len(left) + len(right) > 0, "Cannot merge two empty lists"
    if len(left) == 0:
        return right
    if len(right) == 0:
        return left

    # store variables
    l_ix = 0
    r_ix = 0
    out = []
    
    # algo
    while l_ix < len(left) and r_ix < len(right):
        if left[l_ix] <= right[r_ix]:
            out.append(left[l_ix])
            l_ix += 1
        else:
            out.append(right[r_ix])
            r_ix += 1
    
    return out + left[l_ix:] + right[r_ix:]

In [25]:
def merge_sort(list_in):
    '''Recursive merge sort on list or tuple'''
    
    # edges, assertions
    assert isinstance(list_in, (list, tuple))

    # base case
    if len(list_in) <= 1:
        return list_in
    
    # algo
    split_ix = len(list_in) // 2
    left = merge_sort(list_in[:split_ix])
    right = merge_sort(list_in[split_ix:])
    return linear_merge(left, right)

In [76]:
test_list = gen_list(20)
print(test_list)

[73, 27, 18, 86, 37, 71, 96, 21, 1, 70, 57, 82, 42, 62, 49, 59, 10, 69, 2, 71]


In [77]:
merge_sort(test_list[:]) == sorted(test_list)

True

In [78]:
merge_sort(test_list)

[1, 2, 10, 18, 21, 27, 37, 42, 49, 57, 59, 62, 69, 70, 71, 71, 73, 82, 86, 96]

In [79]:
sorted(test_list)

[1, 2, 10, 18, 21, 27, 37, 42, 49, 57, 59, 62, 69, 70, 71, 71, 73, 82, 86, 96]

In [80]:
test_list

[73, 27, 18, 86, 37, 71, 96, 21, 1, 70, 57, 82, 42, 62, 49, 59, 10, 69, 2, 71]

## 4. Timing

Compare all methods with built-in `sorted()` as baseline (iterative selection sortdoesn't use built-in `min()`).

#### 10-length list 

In [85]:
test = gen_list(10)
test

[76, 33, 65, 24, 55, 20, 31, 91, 42, 31]

In [86]:
%%timeit
sel_sort(test[:])

16.7 µs ± 2.19 µs per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [87]:
%%timeit
ins_sort(test[:])

15.6 µs ± 1.13 µs per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [88]:
%%timeit
merge_sort(test[:])

35.4 µs ± 3.61 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [89]:
%%timeit
sorted(test[:])

1.22 µs ± 48 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


#### 100-length list 

In [90]:
test = gen_list(100)
len(test)

100

In [91]:
%%timeit
sel_sort(test[:])

504 µs ± 50.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [92]:
%%timeit
ins_sort(test[:])

572 µs ± 12.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [93]:
%%timeit
merge_sort(test[:])

493 µs ± 55.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [94]:
%%timeit
sorted(test[:])

19.1 µs ± 586 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


#### 1000-length list 

In [95]:
test = gen_list(1000)
len(test)

1000

In [96]:
%%timeit
sel_sort(test[:])

51.4 ms ± 5.98 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [97]:
%%timeit
ins_sort(test[:])

77.1 ms ± 5.82 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [98]:
%%timeit
merge_sort(test[:])

6.56 ms ± 598 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [99]:
%%timeit
sorted(test[:])

310 µs ± 13.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


#### 10000-length list 

In [100]:
test = gen_list(10000)
len(test)

10000

In [101]:
%%timeit
sel_sort(test[:])

5.27 s ± 341 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [102]:
%%timeit
ins_sort(test[:])

8.35 s ± 441 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [103]:
%%timeit
merge_sort(test[:])

85.3 ms ± 12 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [104]:
%%timeit
sorted(test[:])

3.71 ms ± 183 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


Merge sort gains speed with a 100-length list and then proceeds to leave selection and insertion in the dust. Thank you, O(n log n). However, built-in `sorted()` is over 20x faster in all cases.