In [10]:
import pandas as pd
import numpy as np
import timeit
import copy

In [1]:
# Code from L03

def merge(A, p, q, r):    
    n1 = q - p + 1
    n2 = r - q
    
    L = [0] * n1
    R = [0] * n2

    for i in list(range(n1)):
        L[i] = A[p + i - 1]
    
    for j in list(range(n2)):
        R[j] = A[q + j]
    L.append(float('inf'))
    R.append(float('inf'))

    i = 1 - 1     # Subtract 1 to adjust to Python indexing
    j = 1 - 1     # Subtract 1 to adjust to Python indexing
    
    for k in list(range(p - 1, r)):     # Subtract 1 from q to adjust to Python range object
        if L[i] <= R[j]:
            A[k] = L[i]
            i = i + 1
        else:
            A[k] = R[j]
            j = j + 1
    return A

In [18]:
# See p 34 book for algo
def _merge_sort(A, p, r):
    if p < r:
        q = (p + r) // 2
        _merge_sort(A, p, q)
        _merge_sort(A, q + 1, r)
        merge(A, p, q, r)
    return A

In [19]:
def merge_sort(A):
    """
    Ad hoc code.
    Algorithm must take only one input to work with benchmarking
    below.
    """
    p = 1
    r = len(A)
    _merge_sort(A, p, r)
    return A
        

In [20]:
a = [99, 33, -11, 45]

merge_sort(a)

[-11, 33, 45, 99]

### Benchmarking

In [25]:
benchmark_results_mergesort = pd.DataFrame(columns = 
                                          ['input ordering', 'input size',
                                           'run number','sorting algorithm',
                                           'time'])

In [26]:
# Source TP 2019

for input_ordering in ['sorted','reversed','random','identical']:
    
    for input_size in range(1,4):
        rng = np.random.default_rng(12)
        test_data = np.random.uniform(size=10**input_size)
        
        if input_ordering == 'sorted':
            test_data = sorted(test_data)
        
        elif input_ordering == 'reversed':
            test_data = list(reversed(sorted(test_data)))
        
        elif input_ordering == 'identical':
            test_data = [test_data]*0
            
        clock = timeit.Timer(stmt='sort_func(copy(data))',
                     globals={'sort_func': merge_sort,
                              'data': test_data,
                              'copy': copy.copy})
        n_ar, t_ar = clock.autorange()
        t = clock.repeat(repeat=5, number=n_ar)  
        
        print(f"mergesort minimum time on {input_ordering} \
              data of size 10^{input_size}:", min(t)/n_ar)
        
        
        for run_number in range(5):
            benchmark_results_mergesort = \
                                benchmark_results_mergesort.append(
                                {'input ordering': input_ordering,                        
                                 'input size':10**input_size,
                                  'run number':run_number+1,
                                  'sorting algorithm':'merge_sort',
                                  'time':t[run_number]/n_ar},
                                     ignore_index=True)

mergesort minimum time on sorted               data of size 10^1: 3.9032620000000404e-05
mergesort minimum time on sorted               data of size 10^2: 0.0005033344999998235
mergesort minimum time on sorted               data of size 10^3: 0.006388038000000052
mergesort minimum time on reversed               data of size 10^1: 3.976275999998507e-05
mergesort minimum time on reversed               data of size 10^2: 0.0005065806000000066
mergesort minimum time on reversed               data of size 10^3: 0.006403950000003533
mergesort minimum time on random               data of size 10^1: 4.8432999999999995e-05
mergesort minimum time on random               data of size 10^2: 0.0006393409999998312
mergesort minimum time on random               data of size 10^3: 0.008252345000005334
mergesort minimum time on identical               data of size 10^1: 5.703484999997955e-07
mergesort minimum time on identical               data of size 10^2: 5.728579999999966e-07
mergesort minimum tim

In [29]:
benchmark_results_mergesort.to_pickle(r"C:\Users\ander\Documents\Data-Science\INF221\term_paper\data\benchmark_results_mergesort.pkl")

In [31]:
pd.read_pickle(r"C:\Users\ander\Documents\Data-Science\INF221\term_paper\data\benchmark_results_mergesort.pkl")

Unnamed: 0,input ordering,input size,run number,sorting algorithm,time
0,sorted,10,1,merge_sort,4.001796e-05
1,sorted,10,2,merge_sort,3.903884e-05
2,sorted,10,3,merge_sort,3.903262e-05
3,sorted,10,4,merge_sort,3.904634e-05
4,sorted,10,5,merge_sort,4.744384e-05
5,sorted,100,1,merge_sort,0.0009570875
6,sorted,100,2,merge_sort,0.000563573
7,sorted,100,3,merge_sort,0.0005033345
8,sorted,100,4,merge_sort,0.00052718
9,sorted,100,5,merge_sort,0.000519679
