### Combined algorithm

In [165]:
import pandas as pd
import numpy as np

In [107]:
# Code from L02

def insertion_sort(A):
    for j in range(1, len(A)):
        key = A[j]
        i = j - 1
        while i >= 0 and A[i] > key:
            A[i + 1] = A[i]
            i = i - 1
        A[i + 1] = key
    return A

In [108]:
# Code from L03

def merge(A, p, q, r):    
    n1 = q - p + 1
    n2 = r - q
    
    L = [0] * n1
    R = [0] * n2

    for i in list(range(n1)):
        L[i] = A[p + i - 1]
    
    for j in list(range(n2)):
        R[j] = A[q + j]
    L.append(float('inf'))
    R.append(float('inf'))

    i = 1 - 1     # Subtract 1 to adjust to Python indexing
    j = 1 - 1     # Subtract 1 to adjust to Python indexing
    
    for k in list(range(p - 1, r)):     # Subtract 1 from q to adjust to Python range object
        if L[i] <= R[j]:
            A[k] = L[i]
            i = i + 1
        else:
            A[k] = R[j]
            j = j + 1
    return A

In [109]:
# See p 34 book for algo
# Note to self, remember correct indexing. 
def merge_sort(A, p, r):
    if p < r:
        q = (p + r) // 2
        merge_sort(A, p, q)
        merge_sort(A, q + 1, r)
        merge(A, p, q, r)
    return A

In [126]:
def combined_sort(A, p=1, n=10):
    """
    Combined algorithm mergesort switching to insertion sort for small data
    """
    if len(A) < n:
        insertion_sort(A)
    else:
        merge_sort(A, p, len(A))
    return A

### Benchmarking

In [171]:
benchmark_results_combined = pd.DataFrame(columns = 
                                          ['input ordering', 'input size',
                                           'run number','sorting algorithm',
                                           'time'])

In [172]:
# Source TP 2019

for input_ordering in ['sorted','reversed','random','identical']:
    
    for input_size in range(1,4):
        rng = np.random.default_rng(12)
        test_data = np.random.uniform(size=10**input_size)
        
        if input_ordering == 'sorted':
            test_data = sorted(test_data)
        
        elif input_ordering == 'reversed':
            test_data = list(reversed(sorted(test_data)))
        
        elif input_ordering == 'identical':
            test_data = [test_data]*0
            
        clock = timeit.Timer(stmt='sort_func(copy(data))',
                     globals={'sort_func': combined_sort,
                              'data': test_data,
                              'copy': copy.copy})
        n_ar, t_ar = clock.autorange()
        t = clock.repeat(repeat=5, number=n_ar)  
        
        print(f"Combined sort minimum time on {input_ordering} \
              data of size 10^{input_size}:", min(t)/n_ar)
        
        
        for run_number in range(5):
            benchmark_results_combined = \
                                benchmark_results_combined.append(
                                {'input ordering': input_ordering,                        
                                 'input size':10**input_size,
                                  'run number':run_number+1,
                                  'sorting algorithm':'combined_sort',
                                  'time':t[run_number]/n_ar},
                                     ignore_index=True)

Combined sort minimum time on sorted               data of size 10^1: 4.0502140000171496e-05
Combined sort minimum time on sorted               data of size 10^2: 0.0005105866000012611
Combined sort minimum time on sorted               data of size 10^3: 0.007146997999989253
Combined sort minimum time on reversed               data of size 10^1: 4.343642000021646e-05
Combined sort minimum time on reversed               data of size 10^2: 0.0005580190000000584
Combined sort minimum time on reversed               data of size 10^3: 0.007087903999999981
Combined sort minimum time on random               data of size 10^1: 5.48356800001784e-05
Combined sort minimum time on random               data of size 10^2: 0.0006896444000012707
Combined sort minimum time on random               data of size 10^3: 0.009121948000029079
Combined sort minimum time on identical               data of size 10^1: 9.42202999995061e-07
Combined sort minimum time on identical               data of size 10^2: 9.

In [173]:
benchmark_results_combined

Unnamed: 0,input ordering,input size,run number,sorting algorithm,time
0,sorted,10,1,combined_sort,4.083596e-05
1,sorted,10,2,combined_sort,4.322782e-05
2,sorted,10,3,combined_sort,4.40046e-05
3,sorted,10,4,combined_sort,4.050214e-05
4,sorted,10,5,combined_sort,5.15924e-05
5,sorted,100,1,combined_sort,0.0005105866
6,sorted,100,2,combined_sort,0.0005332646
7,sorted,100,3,combined_sort,0.0005702426
8,sorted,100,4,combined_sort,0.0006293724
9,sorted,100,5,combined_sort,0.000588441


In [174]:
benchmark_results_combined.to_pickle("./benchmark_results_combined.pkl")

In [175]:
pd.read_pickle("./benchmark_results_combined.pkl")

Unnamed: 0,input ordering,input size,run number,sorting algorithm,time
0,sorted,10,1,combined_sort,4.083596e-05
1,sorted,10,2,combined_sort,4.322782e-05
2,sorted,10,3,combined_sort,4.40046e-05
3,sorted,10,4,combined_sort,4.050214e-05
4,sorted,10,5,combined_sort,5.15924e-05
5,sorted,100,1,combined_sort,0.0005105866
6,sorted,100,2,combined_sort,0.0005332646
7,sorted,100,3,combined_sort,0.0005702426
8,sorted,100,4,combined_sort,0.0006293724
9,sorted,100,5,combined_sort,0.000588441
