# Alogrithms

## Quadratic Algorithms

### InsertionSort

In [17]:
# Code from L02

def insertion_sort(A):
    for j in range(1, len(A)):
        key = A[j]
        i = j - 1
        while i >= 0 and A[i] > key:
            A[i + 1] = A[i]
            i = i - 1
        A[i + 1] = key
    return A

### BubbleSort

In [18]:
def bubble_sort(array):
    for j in range(0, len(array)-2):
        for k in range(len(array)-1, j, -1):
            if array[k] < array[k-1]:
                array[k], array[k-1] = array[k-1], array[k]
    return array

## Sub-quadratic Algorithms

### MergeSort

In [19]:
# Code from L03

def merge(A, p, q, r):    
    n1 = q - p + 1
    n2 = r - q
    
    L = [0] * n1
    R = [0] * n2

    for i in list(range(n1)):
        L[i] = A[p + i - 1]
    
    for j in list(range(n2)):
        R[j] = A[q + j]
    L.append(float('inf'))
    R.append(float('inf'))

    i = 1 - 1     # Subtract 1 to adjust to Python indexing
    j = 1 - 1     # Subtract 1 to adjust to Python indexing
    
    for k in list(range(p - 1, r)):     # Subtract 1 from q to adjust to Python range object
        if L[i] <= R[j]:
            A[k] = L[i]
            i = i + 1
        else:
            A[k] = R[j]
            j = j + 1
    return A

In [20]:
# See page 34 book for algorithm
def _merge_sort(A, p, r):
    if p < r:
        q = (p + r) // 2
        _merge_sort(A, p, q)
        _merge_sort(A, q + 1, r)
        merge(A, p, q, r)
    return A

In [21]:
def merge_sort(A):
    """
    Algorithm must take only one input parameter to 
    work in benchmarking with the Timer below
    
    Parameters
    ----------
    A : array
        Numbers to be sorted
    """
    p = 1
    r = len(A)
    _merge_sort(A, p, r)
    return A
        

### QuickSort

In [22]:
# Code from L07

def partition(array, low, high):
    """DEFINE PARTITION FOR QUICKSORT"""
    pivot = array[high]
    i = (low - 1)
    for j in range(low, high):
        if array[j] <= pivot:
            i = i + 1
            array[i], array[j] = array[j], array[i]
    array[i + 1], array[high] = array[high], array[i + 1]
    return i + 1


def quick_sort(array, low=0, high=None):
    """Sorts a list using the quicksort algorithm."""
    if high is None:
        high = len(array) - 1
    if low < high:
        part = partition(array, low, high)
        quick_sort(array, low, part - 1)
        quick_sort(array, part + 1, high)
    return array

## Combined Algoriothms

### MergeSort switching to InsertionSort for small data

In [23]:
def combined_sort(A, p=1, n=100):
    """
    Combined algorithm mergesort switching to insertion sort for small data
    
    Parameters
    ----------
    
    A : array 
        Numbers to be sorted
       
    p : int
        Start index. Default=1 for sorting the entire array.
    
    n : int
        Threshold value when the function shifts sorting algorithm
       
    """
    if len(A) < n:
        insertion_sort(A)
    else:
        _merge_sort(A, p, len(A))
    return A

## Built-in sorting functions

## Python 'sort()'

In [24]:
sorted

<function sorted(iterable, /, *, key=None, reverse=False)>

## NumPy 'sort()'

In [25]:
import numpy as np
np.sort

<function numpy.sort(a, axis=-1, kind=None, order=None)>

In [26]:
array=[0, -5, 4,8,2,-55, 1,0,-856,94, 9,78,5,-84,568]


# Benchmarking

In [27]:
import numpy as np
import pandas as pd
import timeit
import copy
import os.path

In [28]:
def run_benchmark(sort_func,
                  input_base,
                  input_power,
                  seed=None,
                  save=True,
                  num_runs=5):
    """
    Run benchmark with given parameters

    Parameters
    ----------------------------------------------------------------------
    sort_func: function
               Algorithm to used for sorting.
    input_power: int
                 The power of the data size
    seed: int, optional
    save: bool, optional
          Saving results to file. Default is True.
    input_base: int, optional
                Raise this number to input_power to determine data size
    num_runs: int, optional
              Number of runs at each test case
    ----------------------------------------------------------------------
    """

    input_size = input_base**input_power
    # Create data frame for storing results
    results = pd.DataFrame(columns=[
        'input order', 'input size', 'run number', 'sorting algorithm', 'time'
    ])
    for order in ['sorted', 'reversed', 'random']:
        for p in range(input_power + 1):

            quicksort_recurrsion_limit = sort_func == quick_sort and (
                order == 'sorted'
                or order == 'reversed') and ((input_base == 10 and p > 3) or
                                             (input_base == 2 and p > 11))
            print(sort_func, order, input_base, p, num_runs)

            if not quicksort_recurrsion_limit:

                # Generate random data
                rng = np.random.default_rng(seed)
                test_data = rng.uniform(size=input_base**p)

                # Presorting
                if order == 'sorted':
                    test_data = sorted(test_data)

                elif order == 'reversed':
                    test_data = list(reversed(sorted(test_data)))

                # Timer function
                clock = timeit.Timer(stmt='sort_func(copy(data))',
                                     globals={
                                         'sort_func': sort_func,
                                         'data': test_data,
                                         'copy': copy.copy
                                     })
                n_ar, t_ar = clock.autorange()
                t = clock.repeat(repeat=7, number=n_ar)

                # Print out average time over the number of runs for each data size
                print(
                    f"Minimum time(s) on {order} data of size "
                    f"{input_base**p}:",
                    np.min(t) / n_ar)

                for run_number in range(num_runs):
                    results = \
                        results.append(
                            {'input order': order,
                             'input size': input_base**p,
                             'run number': run_number + 1,
                             'sorting algorithm': f'{sort_func.__name__}',
                             'time': t[run_number] / n_ar},
                            ignore_index=True)
        if save:
            # Save pickled data frame to file in data directory
            directory = '../data/'
            filename = '{0}_n{1}^{2}.pkl'.format(sort_func.__name__, input_base, input_power)
            file_path = os.path.join(directory, filename)
            if not os.path.isdir(directory):
                os.mkdir(directory)
            results.to_pickle(file_path)
            print()
            print(f'Saved to path: {file_path}')

In [29]:
# from benchmarking import run_benchmark
import numpy as np
import os

In [30]:
sorting_functions = {
#     'Quick Sort': quick_sort,
#     'Insertion Sort': insertion_sort,
    'Bubble Sort': bubble_sort,
#     'Merge Sort': merge_sort,
#     'Combined Sort': combined_sort,
#     'Python Sorted': sorted,
#     'NumPy Sort': np.sort,
}

In [None]:
for title, sort in sorting_functions.items():
    run_benchmark(sort, input_base=10, input_power=8, seed=12)

<function bubble_sort at 0x000002BE64950318> sorted 10 0 5
Minimum time(s) on sorted data of size 1: 7.990535999999793e-07
<function bubble_sort at 0x000002BE64950318> sorted 10 1 5
Minimum time(s) on sorted data of size 10: 9.084247999999775e-06
<function bubble_sort at 0x000002BE64950318> sorted 10 2 5
Minimum time(s) on sorted data of size 100: 0.0005833106000000043
<function bubble_sort at 0x000002BE64950318> sorted 10 3 5
Minimum time(s) on sorted data of size 1000: 0.0633226800000017
<function bubble_sort at 0x000002BE64950318> sorted 10 4 5
Minimum time(s) on sorted data of size 10000: 9.26716549999999
<function bubble_sort at 0x000002BE64950318> sorted 10 5 5
