# Exercise 1.

Although merge sort has a better Big-O than selection sort, selection sort can be faster for smaller inputs.

Rewrite `merge_sort(A, min_size)` such that sub-arrays smaller than an input parameter `min_size` are sorted with our `selection_sort` from the lecture `algorithms intro`.

Time the difference between pure merge sort and this new algorithm. Is it faster? Why or why not?

In [10]:
import random
import time


# Taken from class notes:
def linear_search(arr):
  """
  Find the index of the minimum element
  AKA argsort
  """
  # initialize current best to +infinity
  # So any element beats it
  current_min = float('inf')
  current_min_idx = 0
  for i in range(len(arr)):
    if arr[i] < current_min:
      current_min = arr[i]
      current_min_idx = i
  return current_min_idx

# taken from class notes:
def selection_sort(arr):
  """Selection sort"""
  n_sorted = 0
  while n_sorted < len(arr):
    # Get the index of the min of remaining elements
    # Since argsort returns based on array, we correct result
    # with `+ n_sorted`
    min_idx = linear_search(arr[n_sorted:]) + n_sorted
    # Swap minimum element with leftmost remaining element
    to_swap = arr[n_sorted]
    arr[n_sorted] = arr[min_idx]
    arr[min_idx] = to_swap
    # Increment and restart
    n_sorted += 1

def merge(left, right):
  res = []
  # Zip in together left and right parts
  while len(left)>0 and len(right)>0: 
      if left[0]<right[0]: 
          res.append(left[0]) 
          left.pop(0)
      else: 
          res.append(right[0]) 
          right.pop(0)
  # Copy in remaining elements of left and right
  # (if there are any)
  for i in left: 
      res.append(i) 
  for i in right: 
      res.append(i)
  return res
    
# Class m_s
def merge_sort(A): 
    size = len(A)
    if size > 1:
      m = size // 2
      left = merge_sort(A[m:]) 
      right = merge_sort(A[:m])
      return merge(left, right)
    else:
      return A
    
# Edited from class notes
def merge_sort_edit(A, min_size): 
    size = len(A)
    """
    OUR EDIT:
    """
    if size < min_size: return selection_sort(A)
    if size > 1:
      m = size // 2
      left = merge_sort(A[m:]) 
      right = merge_sort(A[:m])
      return merge(left, right)
    else:
      return A

# Random array generator without numpy
arr = [random.random() for _ in range(100)]
min_size = 13

start = time.time()
print("Starting regular merge_sort")
merge_sort(arr)
end = time.time()
print(end - start)
startm_s = time.time()
print("Starting min-size merge_sort")
merge_sort_edit(arr, min_size)
endm_s = time.time()
print(endm_s - startm_s)

"""

We can see that merge_sort_min_size is quicker, and seems to be quicker the higher min_size is;
We could theorize that ms_min_size is quicker as min_size increases since the recursion finished
sooner than the regular m_s, thus adding less time complexity to the entire operation.
"""

Starting regular merge_sort
0.0006668567657470703
Starting min-size merge_sort
0.00039696693420410156


# Exercise 2. 

Let $A[1...n]$ be an array of $n$ distinct numbers. If $i < j$ and $A[i] > A[j]$, then the pair $(i, j)$ is called an **inversion** of $A$. 

In other words an inversion is a pair of unsorted elements in an array.

**1)** List the five inversions of $[2, 3, 8, 6, 1]$ 

**2)** Give an algorithm that determines the number of inversions in any permutation on $n$ elements in $O(nlog_2(n))$ worst-case time. (Hint: Modify merge sort.)

In [57]:

"""
We swap the left-right check
"""

def merge_d(left, right):
  res = []
  while len(left)>0 and len(right)>0: 
      if left[0]>right[0]: 
          res.append(left[0]) 
          left.pop(0)
      else: 
          res.append(right[0]) 
          right.pop(0)
  # Copy in remaining elements of left and right
  # (if there are any)
  for i in left: 
      res.append(i) 
  for i in right: 
      res.append(i)
  return res
    
# Class m_s
def merge_sort_d(A): 
    size = len(A)
    if size > 1:
      m = size // 2
      left = merge_sort_d(A[m:]) 
      right = merge_sort_d(A[:m])
      return merge_d(left, right)
    else:
      return A



def inversions(arr):
    size = len(arr)
    m = size // 2
    inversion_pairs = []
#     left = merge_sort(A[m:]) 
#     right = merge_sort(A[:m])
    for i in range(size):
        ai = arr[i]
        for j in range(i + 1, size):
            aj = arr[j]
            if i < j and ai > aj: inversion_pairs.append((ai, aj))
    return inversion_pairs
    
"""
To satisfy the condition where 𝑖<𝑗  and  𝐴[𝑖]>𝐴[𝑗]
is to technically study the inversions of the descending-sorted
version of the array.
and to apply the inversions method to that array
"""
def inversion_perms(arr):
    sorted_arr = merge_sort_d(arr)
#     print(sorted_arr)
    return inversions(sorted_arr)

def count_of_possible_inversions(arr):
    check = inversion_perms(arr)
    return "There are {length} possible inversions for all permutations of arr ".format(length=len(check))
    
    
arr = [2,3,8,6,1]
print('1:')
print(inversions(arr))
print('2:')
print(count_of_possible_inversions(arr))
print(inversion_perms(arr))


1:
[(2, 1), (3, 1), (8, 6), (8, 1), (6, 1)]
2:
There are 10 possible inversions for all permutations of arr 
[(8, 6), (8, 3), (8, 2), (8, 1), (6, 3), (6, 2), (6, 1), (3, 2), (3, 1), (2, 1)]


# 3. Recursive sum

Write a function that uses recursion to compute the sum of an array or list of numbers

```
recursive_sum([2, 4, 5, 6, 7])

output: 24
```

In [13]:
def recursive_sum(arr, at=0):
    return arr[at] + (recursive_sum(arr, at + 1) if at < len(arr) else 0)
    
    
recursive_sum([2, 4, 5, 6, 7])

24

# 4. Recursive denominators

Write a Python program that uses recursion to find the greatest common divisor (gcd) of two integers.

```
recursive_gcd(12,14)

output : 2
```

In [15]:
def recursive_gcd(int1, int2):
    minimum = min(int1, int2)
    common_divisors = []
    #we start at one .. because we don't want to end the world by dividing by 0
    def runner(value=1): 
        if minimum <= value: return
        """
        We use an array to collect all the common divisors,
        since two number can have multiple divisors
        """
        if int1 % value == 0 and int2 % value == 0: common_divisors.append(value)
        return runner(value + 1)
    runner()
    """
    We return the last divisor collected,
    or we return None
    """
    return None if len(common_divisors) == 0 else common_divisors[len(common_divisors) - 1]
    
recursive_gcd(12,14)

2

# 5. Recursive power function

Write a recursive function to calculate the value of 'a' to the power 'b'. 

```
recursive_pow(3, 4)

output: 81
```

In [26]:
def recursive_pow(x, y, at=0):
    """
    If the power is negative, it's equivalent to :
    x^-y = 1 / (x^y)
    """
    if y < 0: return 1 / recursive_pow(x, abs(y))
    return x * (recursive_pow(x, y, at + 1) if at < y - 1 else 1)
    
    
recursive_pow(3, 4)

81

# 6. (Stretch) K-Nearest Neighbours

Consider a matrix with the following format:

```
[[0.3, 0.8],
 [-0.2, 0.5],
 [1, -1],
 [0.9, 0.5]
]
```

Each row denotes a point, and the numbers in each row are the coordinates. The coordinates in this example are in 2d, but the matrix could be in 3d (3 numbers per row) or even higher dimensions.

Your task is to write a function `knn(m, p)` or `k_nearest_neighbors(m, p, k)` which takes in a matrix of points `m`, an integer `p` denoting the index of a point in that matrix, and an intger `k` denoting the number of nearest neighbors to return.

The function returns the index of the `k` nearest neighbors of the point `p` in the matrix `m`.

```
dataset = [[2.7810836,2.550537003,0],
	[1.465489372,2.362125076,0],
	[3.396561688,4.400293529,0],
	[1.38807019,1.850220317,0],
	[3.06407232,3.005305973,0],
	[7.627531214,2.759262235,1],
	[5.332441248,2.088626775,1],
	[6.922596716,1.77106367,1],
	[8.675418651,-0.242068655,1],
	[7.673756466,3.508563011,1]]

knn(dataset, 0, 2)

output : [4, 1]
```

You can use `from sklearn.neighbors import NearestNeighbors` to test your function

In [47]:
import math

"""
Copied but edited from class notes to work with 
the following tuple:

(index, distance to point)

"""
def merge_tuples(left, right):
  res = []
  # Zip in together left and right parts
  while len(left)>0 and len(right)>0: 
      if left[0][1]<right[0][1]: 
          res.append(left[0]) 
          left.pop(0)
      else: 
          res.append(right[0]) 
          right.pop(0)
  # Copy in remaining elements of left and right
  # (if there are any)
  for i in left: 
      res.append(i) 
  for i in right: 
      res.append(i)
  return res
    
# Class m_s
def merge_sort_tuples(A): 
    size = len(A)
    if size > 1:
      m = size // 2
      left = merge_sort_tuples(A[m:]) 
      right = merge_sort_tuples(A[:m])
      return merge_tuples(left, right)
    else:
      return A



"""
Based on the formulae of a point in nth dimension

dist = sqrt((x2 - x1)^2 + (y2 - y1)^)^2 + ... + (n2 - n1)^2)

we can say dist = sqrt(A)

where A = (x2 - x1)^2 + (y2 - y1)^)^2 + ... + (n2 - n1)^2
"""

"""
To calculate A above, we use a recursive approach
like in the cum_sum method earlier

This allows us to handle any dimension, instead of hardcoding
for a specific dimension
"""
def sum_difference_squares(a, b, at=0):
    return pow((b[at] - a[at]), 2) + (sum_difference_squares(a, b, at + 1) if at < len(a) - 1 else 0.0)

def distance(a, b, at=0):
    diff = sum_difference_squares(a, b)
    return math.sqrt(diff)



def knn(m, p, k):
    
    point = m[p]
    
    """
    We opt for an array of tuple(index, distance to point),
    since the alternative of indexing distances with their distances
    would've meant an additional iteration over that index, adding 
    time complexity
    """
    distances = []
    
    """
    We use a recursive method to iretate 
    through the sides of the array either ends
    of the point at p
    """
    def calc_distance(at, step):
        current = at + step
        if current < 0: return
        if current >= len(m): return
        dist = distance(point, m[current])
        distances.append((current, dist))
        return calc_distance(current, step)
        
    """
    We move inside-out from the point at n
    """
    calc_distance(p, -1)
    calc_distance(p, 1)
    
    """
    We use merge-sort on the resulting array of (index, distance to point)
    tuples as that would have been simpler than iterating over the resulting array
    to insert in order at each distance calculation
    """
    result = merge_sort_tuples(distances)
    """
    We return the sublist of 0 to k
    """
    return result if k > len(result) else [i for (i, dist) in result[:k]]


dataset = [[2.7810836,2.550537003,0],
    [1.465489372,2.362125076,0],
    [3.396561688,4.400293529,0],
    [1.38807019,1.850220317,0],
    [3.06407232,3.005305973,0],
    [7.627531214,2.759262235,1],
    [5.332441248,2.088626775,1],
    [6.922596716,1.77106367,1],
    [8.675418651,-0.242068655,1],
    [7.673756466,3.508563011,1]]

knn(dataset, 3, 3)

[1, 0, 4]