# Implementation of Searching & Sorting algorithms

## Setup

* Set up the `Timer` class to time execution

In [1]:
import time

class TimerError(Exception):
  """A custom exception used to report errors in use of Timer class"""

class Timer:
  def __init__(self):
    self._start_time = None
    self._elapsed_time = None
  
  def start(self):
    """Start a new timer"""
    if self._start_time is not None:
      raise TimerError('Timer is running. Use .stop()')
    
    self._start_time = time.perf_counter()
  
  def stop(self):
    """Save the elapsed time and re-initialize the timer"""
    if self._start_time is None:
      raise TimerError('Timer is not running. Use .start()')
    
    self._elapsed_time = time.perf_counter() - self._start_time
    self._start_time = None
  
  def elapsed(self):
    """Report the elapsed time"""
    if self._elapsed_time is None:
      raise TimerError('Timer has not been run yet. Use .start()')
    
    return self._elapsed_time
  
  def __str__(self):
    """print() prints the elapsed"""
    return str(self._elapsed_time)

#### Naive search by scanning the list

In [2]:
def naive_search(v, l):
  for x in l:
    if x == v:
      return True
  
  return False

#### Binary search

In [3]:
def binary_search(v, l):
  if l == []:
    return False
  
  mid = len(l) // 2
  if v == l[mid]:
    return True

  if v < l[mid]:
    return binary_search(v, l[:mid])
  else:
    return binary_search(v, l[mid + 1:])

#### Checking correctness on input [0, 2, ..., 50]

In [5]:
l = list(range(0, 51, 2))

for i in range(51):
  print(f'[{i}, {naive_search(i, l)}]', end=', ')
print()

for i in range(51):
  print(f'[{i}, {binary_search(i, l)}]', end=', ')
print()

[0, True], [1, False], [2, True], [3, False], [4, True], [5, False], [6, True], [7, False], [8, True], [9, False], [10, True], [11, False], [12, True], [13, False], [14, True], [15, False], [16, True], [17, False], [18, True], [19, False], [20, True], [21, False], [22, True], [23, False], [24, True], [25, False], [26, True], [27, False], [28, True], [29, False], [30, True], [31, False], [32, True], [33, False], [34, True], [35, False], [36, True], [37, False], [38, True], [39, False], [40, True], [41, False], [42, True], [43, False], [44, True], [45, False], [46, True], [47, False], [48, True], [49, False], [50, True], 
[0, True], [1, False], [2, True], [3, False], [4, True], [5, False], [6, True], [7, False], [8, True], [9, False], [10, True], [11, False], [12, True], [13, False], [14, True], [15, False], [16, True], [17, False], [18, True], [19, False], [20, True], [21, False], [22, True], [23, False], [24, True], [25, False], [26, True], [27, False], [28, True], [29, False], [30, Tr

### Performance comparison across $10^{4}$ worst case searches in a list of size $10^{5}$

* Looking for odd numbers in a list of even numbers

In [7]:
l = list(range(0, 100000, 2))
t = Timer()

t.start()
for i in range(3001, 13000, 2):
  v = naive_search(i, l)
t.stop()

print()
print('Naive Search', t)

t.start()
for i in range(3001, 13000, 2):
  v = binary_search(i, l)
t.stop()

print()
print('Binary search', t)


Naive Search 10.719358297000042

Binary search 0.8333934999999997


### Selection sort

In [8]:
def selection_sort(L):
  n = len(L)
  if n < 1:
    return L
  
  for i in range(n):
    min_index = i

    for j in range(i + 1, n):
      if L[j] < L[min_index]:
        min_index = j
    
    L[i], L[min_index] = L[min_index], L[i]
  
  return L

#### Selection sort performance is more or less the same for all inputs

In [10]:
import random
random.seed(2021)

input_lists = {}
input_lists['random'] = [random.randrange(100000) for i in range(5000)]
input_lists['ascending'] = [i for i in range(5000)]
input_lists['descending'] = [i for i in range(4999, -1, -1)]

t = Timer()
for k in input_lists.keys():
  temp_list = input_lists[k][:]
  t.start()
  selection_sort(temp_list)
  t.stop()
  print(k, t)

random 1.250216086000023
ascending 1.2441596080000181
descending 1.320314938000024


#### Insertion sort, iterative

In [12]:
def insertion_sort(L):
  n = len(L)
  if n < 1:
    return L
  
  for i in range(n):
    j = i
    while j > 0 and L[j] < L[j - 1]:
      L[j], L[j - 1] = L[j - 1], L[j]
      j = j - 1
  
  return L

#### Insertion sort performance

* On already sorted input, performance is very good
* On reverse sorted input, performance is worse than selection sort

In [13]:
import random
random.seed(2021)

input_lists = {}
input_lists['random'] = [random.randrange(100000) for i in range(5000)]
input_lists['ascending'] = [i for i in range(5000)]
input_lists['descending'] = [i for i in range(4999, -1, -1)]

t = Timer()
for k in input_lists.keys():
  temp_list = input_lists[k][:]
  t.start()
  insertion_sort(temp_list)
  t.stop()
  print(k, t)

random 2.379664321000064
ascending 0.0008934370000588387
descending 4.673232396000003


### Insertion sort, recursive

In [14]:
def insert(L, v):
  n = len(L)
  if n == 0:
    return [v]
  
  if v >= L[-1]:
    return L + [v]
  else:
    return insert(L[:-1], v) + L[-1:]


def ins_sort(L):
  n = len(L)
  if n < 1:
    return L
  
  return insert(ins_sort(L[:-1]), L[-1])

In [15]:
import random
random.seed(2021)

input_lists = {}
input_lists['random'] = [random.randrange(100000) for i in range(5000)]
input_lists['ascending'] = [i for i in range(5000)]
input_lists['descending'] = [i for i in range(4999, -1, -1)]

t = Timer()
for k in input_lists.keys():
  temp_list = input_lists[k][:]
  t.start()
  ins_sort(temp_list)
  t.stop()
  print(k, t)

RecursionError: ignored

### Setup
> To overcome the default maximum recursion depth of Python (which is usually set to 1000)

* Set the recursion limit to maxint, i.e. $2^{31} - 1$
  - The highest value that Python allows

In [16]:
import sys
sys.setrecursionlimit(2 ** 31 - 1)

#### Recursive insertion sort is slower than the iterative version

* Input of 2000 (40%) takes more time than 5000 for iterative
  - Overhead of recursive calls
* Performance pattern between unsorted, sorted and random is similar

In [18]:
import random
random.seed(2021)

input_lists = {}
input_lists['random'] = [random.randrange(100000) for i in range(2000)]
input_lists['ascending'] = [i for i in range(2000)]
input_lists['descending'] = [i for i in range(1999, -1, -1)]

t = Timer()
for k in input_lists.keys():
  temp_list = input_lists[k][:]
  t.start()
  ins_sort(temp_list)
  t.stop()
  print(k, t)

random 12.180662461999873
ascending 0.029959382000015466
descending 21.513495066999894


### Merge sort

In [19]:
def merge(A, B):
  m, n = len(A), len(B)
  C, i, j, k = [], 0, 0, 0

  while k < m + n:
    if i == m:
      C.extend(B[j:])
      k = k + (n - j)
    elif j == n:
      C.extend(A[i:])
      k = k + (n - i)
    elif A[i] < B[j]:
      C.append(A[i])
      i, k = i + 1, k + 1
    else:
      C.append(B[j])
      j, k = j + 1, k + 1
  
  return C

In [20]:
def merge_sort(A):
  n = len(A)
  if n <= 1:
    return A
  
  L = merge_sort(A[:n // 2])
  R = merge_sort(A[n // 2:])

  return merge(L, R)

#### A simple input to check correctness

In [21]:
merge_sort([i for i in range(0, 1000, 2)] + [j for j in range(1, 1000, 2)])

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


#### Performance on large inputs, $10^{6}$, random and sorted

In [22]:
import random
random.seed(2021)

input_lists = {}
input_lists['random'] = [random.randrange(100000000) for i in range(1000000)]
input_lists['ascending'] = [i for i in range(1000000)]
input_lists['descending'] = [i for i in range(999999, -1, -1)]

t = Timer()
for k in input_lists.keys():
  temp_list = input_lists[k][:]
  t.start()
  merge_sort(temp_list)
  t.stop()
  print(k, t)

random 9.493494636999912
ascending 5.07402918899993
descending 5.157073330000003
