# Linear Time Selection

In [1]:
import random

def partition(arr, pivot_index, lower, higher):
    swap(arr, pivot_index, higher)
    pivot = arr[higher]
    i = lower
    for j in range(lower, higher):
        if (arr[j] <= pivot):
            swap(arr, i, j)
            i += 1
    swap(arr, i, higher)
    return i

def swap(arr, i, j):
    arr[i], arr[j] = arr[j], arr[i]

## 1. Random Select

Worst case: $O(n^2)$  
AVG case: $O(n)$

In [2]:
def rselect(arr, index, lower, higher):
    pivot_index = random.randint(lower, higher)
    pivot = partition(arr, pivot_index, lower, higher)
    if pivot == index:
        return arr[pivot]
    if pivot > index:
        return rselect(arr, index, lower, pivot - 1)
    return rselect(arr, index, pivot + 1, higher)
    
arr1 = [1, 6, 3, 9, 12, 8, 4, 2]
rselect(arr1, 1, 0, len(arr1) - 1)

2

## 2. Deterministic Select

"median of medians"

Worst case: $O(n)$  
But: Worse constant work and not in-place

```
DSelect(array A, length n, order statistic i)
  1. Break A into groups of 5, sort each group
  2. C = n/5 "middle elements"
  3. p = DSelect(C, n/5, n/10) [recursively compute median of C]
  4. Partition A around p with pivot point j
  5. If j = i return p
  6. If j < i return DSelect(1st part of A, ...)
  7. If j > i return DSelect(2nd part of A, ...)
```

In [3]:
def dselect(arr, index, lower, higher):
    if lower == higher:
        return arr[lower]
    
    groups = [arr[i:min(i + 5, higher + 1)] for i in range(lower, higher + 1, 5)]
    medians = [sorted(g)[(len(g) - 1) // 2] for g in groups]
    if len(medians) <= 2:
        median_of_medians = medians[0]
    else:
        median_of_medians = dselect(
            medians, (len(medians) - 1) // 2, 0, len(medians) - 1)
        
    pivot_index = arr.index(median_of_medians)
    pivot = partition(arr, pivot_index, lower, higher)
    
    if pivot == index:
        return arr[pivot]
    if pivot > index:
        return dselect(arr, index, lower, pivot - 1)
    return dselect(arr, index, pivot + 1, higher)

arr2 = [1, 6, 3, 9, 12, 8, 4, 2]
dselect(arr2, 1, 0, len(arr2) - 1)

2