## Difference between Lists and Arrays (implementation)

### Setup

* Set the recursion limit to `maxint`,  $2^{31} - 1$
  - This is the highest value Python allows
* Setup the `Timer` class to time executions

In [None]:
import sys
sys.setrecursionlimit(2 ** 31 - 1)

In [None]:
import time

class TimeError(Exception):
  """A custom exception used to report error in the use of Timer class"""

class Timer:
  def __init__(self):
    self._start = 0
    self._elapsed = 0
  
  def start(self):
    if self._start is not None:
      raise TimeError('Timer is running. Use .stop()')
    
    self._start = time.perf_counter()
  
  def stop(self):
    if self._start is None:
      raise TimeError('Timer is not running. Use .start()')
    
    self._elapsed = time.perf_counter() - self._start
    self._start = None
  
  def elapsed(self):
    if self._elapsed is None:
      raise TimeError('Timer has not been run yet. Use .start()')
    
    return self._elapsed
  
  def __str__(self):
    return str(self._elapsed)

#### Python lists

In [None]:
t = Timer()
t.stop()
t.start()
l = []
for i in range(10000000):
  l.append(i)
t.stop()
print(t)

2.0300606969999535


In [None]:
t = Timer()
t.stop()
t.start()
l = []
for i in range(100000):
  l.insert(0, i)
t.stop()
print(t)

2.7762632679999797


#### Searching
Naive search and Binary search with lists

In [None]:
def naive_search(v, L):
  for x in L:
    if v == x:
      return True
  return False

In [None]:
def binary_search_list(v, L):
  if L == []:
    return False
  
  mid = len(L) // 2
  if v == L[mid]:
    return True
  
  if v < L[mid]:
    return binary_search_list(v, L[:mid])
  else:
    return binary_search_list(v, L[mid + 1:])

Naive search and Binary search with arrays (NumPy)

In [None]:
def naive_search(v, A, l, r):
  for i in range(l, r):
    if v == A[i]:
      return True
  return False

In [None]:
def binary_search_array(v, A, l, r):
  if r - l <= 0:
    return False
  
  mid = (l + r) // 2
  if v == A[mid]:
    return True
  
  if v < A[mid]:
    return binary_search_array(v, A, l, mid)
  else:
    return binary_search_array(v, A, mid + 1, r)

#### Performance comparison across $10^4$ worst case searches in a sequence of size $10^5
  * Looking for odd numbers in a sequence of even numbers

Naive search vs Binary search on lists

In [None]:
l = list(range(0, 100000, 2))
t = Timer()
t.stop()
t.start()
for i in range(3001, 13000, 2):
  v = naive_search(i, l)
t.stop()
print()
print('Naive search', t)
t.start()
for i in range(3001, 13000, 2):
  v = binary_search_list(i, l)
t.stop()
print()
print('Binary search', t)


Naive search 10.552074291000054

Binary search 0.8519362889999229


Naive search vs Binary search on arrays

In [None]:
import numpy as np
my_array = np.arange(0, 100000, 2)
t = Timer()
t.stop()
t.start()
for i in range(3001, 5000, 2):
  v = naive_search(i, my_array, 0, np.prod(my_array.shape))
t.stop()
print()
print('Naive search', t)
t.start()
for i in range(3001, 13000, 2):
  v = binary_search_array(i, my_array, 0, np.prod(my_array.shape))
t.stop()
print()
print('Binary search', t)


Naive search 18.30817441499994

Binary search 0.19423603599989292


Questions
* Binary search in arrays is much faster than in lists
* Why is naive search in arrays slower than in lists?

### Sorting
#### Selection sort

Selection sort on a list

In [None]:
def selection_sort_list(L):
  n = len(L)
  if n < 1:
    return L
  
  for i in range(n):
    min_pos = i
    for j in range(i + 1, n):
      if L[j] < L[min_pos]:
        min_pos = j
    L[i], L[min_pos] = L[min_pos], L[i]
  
  return L

Selection sort on an array

In [None]:
def selection_sort_array(A):
  n = np.prod(A.shape)
  if n < 1:
    return A
  
  for i in range(n):
    min_pos = i
    for j in range(i + 1, n):
      if A[j] < A[min_pos]:
        j = min_pos
    A[i], A[min_pos] = A[min_pos], A[i]
  
  return A

Selection sort performance is more or less the same for all inputs

Selection sort performance on lists

In [None]:
import random
random.seed(2021)
input_lists = {}
input_lists['random'] = [random.randrange(100000) for i in range(100000)]
input_lists['ascending'] = [i for i in range(100000)]
input_lists['descending'] = [i for i in range(9999, -1, -1)]
t = Timer()
for k in input_lists.keys():
  temp_list = input_lists[k][:]
  t.start()
  selection_sort_list(temp_list)
  t.stop()
  print(k, t)

Selection sort performance on arrays

In [None]:
import numpy as np
import random
random.seed(2021)
input_lists = {}
input_lists['random'] = np.arange(100000)
for i in range(100000):
    input_lists['random'][i] = random.randrange(100000)
input_lists['ascending'] = [i for i in range(100000)]
input_lists['descending'] = [i for i in range(9999, -1, -1)]
t = Timer()
for k in input_lists.keys():
  temp_list = input_lists[k][:]
  t.start()
  selection_sort_array(temp_list)
  t.stop()
  print(k, t)

_Question:_ Why is selection sort slower on arrays than on lists?