# Arrays

In [4]:
from math import floor
from random import randint

## Get $min$ and $max$

Uses only $\frac{3}{4}N$ comparisons.

In [5]:
def fast_min_max(T):
    n = len(T)
    min_, max_ = float("inf"), float("-inf")
    if n % 2 == 1:
        min_ = max_ = T[0]
    for i in range(1 + n % 2, n, 2):
        a, b = T[i - 1], T[i]
        if a > b:
            min_ = min(min_, b)
            max_ = max(max_, a)
        else:
            min_ = min(min_, a)
            max_ = max(max_, b)
    return min_, max_

Test:

In [6]:
for _ in range(1000):
    T = [randint(-10, 11) for _ in range(randint(1, 100))]
    if (min(T), max(T)) != fast_min_max(T):
        print(T)

## Mode 
(pol. dominanta)

### $O(n^2)$

### $O(n\log{n})$

Using sorting.

In [7]:
def mode(T):
    if not T:
        return None
    T = sorted(T)

    cnt, val = 1, T[0]
    max_cnt, max_vals = 1, []

    for e in T[1:]:
        if e == val:
            cnt += 1
        else:
            if cnt > max_cnt:
                max_cnt = cnt
                max_vals = [val]
            elif cnt == max_cnt:
                max_vals.append(val)
            cnt = 1
            val = e
    if cnt > max_cnt:
        max_cnt = cnt
        max_vals = [val]
    elif cnt == max_cnt:
        max_vals.append(val)

    return max_vals

### $O(n + m)$

$n$ - number of elements in input array  
$m$ - number elements in range $[\min{n}, \max{n}]$

Create a counter for each value in $[\min{n}, \max{n}]$.

In [8]:
def mode2(T):
    min_, max_ = fast_min_max(T)
    cnts = [0 for _ in range(min_, max_ + 1)]
    max_cnt = 0

    for e in T:
        cnts[e - min_] += 1
        max_cnt = max(max_cnt, cnts[e - min_])

    return [i + min_ for i, cnt in enumerate(cnts) if cnt == max_cnt]



Test:

In [9]:
for _ in range(100):
    T = [randint(-20, 20) for _ in range(randint(1, 1000))]
    if sorted(mode2(T)) != sorted(mode(T)):
        print(T)

## Leader

Finding an element witch occurs more than $\frac{n}{2}$ times.

### $O(n)$

In [10]:
def leader(T):
    v, cnt = T[0], 1
    for w in T:
        if w == v:
            cnt += 1
        elif cnt - 1 == 0:
            v = w
        else:
            cnt -= 1
    
    cnt = 0
    for w in T:
        if w == v:
            cnt += 1
    
    return v if cnt > len(T) / 2 else None


## Binary search $O(\log{n})$ 

In [11]:
def binary_search(T, x):
    l, c, r = 0, len(T) // 2, len(T)
    while l < r:
        if T[c] == x:
            return c
        elif T[c] < x:
            l, c = c + 1, l + (r - l) // 2
        else:
            c, r = (r - l) // 2, c
    return -1

## Interpolation search $O(\log{\log{n}})$ 

Assume we have a sorted array with linearly distributed numbers from range $[L, R]$ and indices $i_L ... i_R$. We want to check if $X\in [L, R]$.

We can use the following formula:

$$
i_X = i_L + \lfloor \frac{X - L}{R - L}(i_R - i_L) \rfloor
$$

In [12]:
def interpolation(T, il, ir, x):
    if T[ir] - T[il] == 0: # all values are the same
        return il
    return il + floor((x - T[il]) / (T[ir] - T[il]) * (ir - il))

def interpolation_search(T, x):
    il, ir = 0, len(T) - 1
     
    while T[il] <= x <= T[ir]:
        ix = interpolation(T, il, ir, x)
        if ix == -1:
            return -1
        elif T[ix] == x:
            return ix
        elif T[ix] < x:
            il = ix + 1
        else:
            ir = ix - 1
    return -1

Test:

In [13]:
for _ in range(100):
    T = sorted([randint(-8, 8) for _ in range(randint(1, 100))])
    for i in range(-20, 120):
        try:
            a = T[interpolation_search(T, i)] == i
            b = T[binary_search(T, i)] == i
        except Exception as e:
            print(e, a, b, c, i, T)
        c = i in T
        if a != b != c:
            print(a, b, c, i, T)

## The k-th largest element

### $O(n\log{n})$
Using sorting.

### $O(nk)$

In [14]:
def kth_largest(T, k):
    M = [float("-inf") for _ in range(k)]
    for e in T:
        i = 0
        while i < k and M[i] <= e:
            e, M[i] = M[i], e
            i += 1
    return M[k - 1]

### $O(n + m)$

$n$ - number of elements in input array  
$m$ - number elements in range $[\min{n}, \max{n}]$

Create a counter for each value in $[\min{n}, \max{n}]$.

In [15]:
def kth_largest2(T, k):
    min_, max_ = fast_min_max(T)
    cnts = [0 for _ in range(min_, max_ + 1)]
    for e in T:
        cnts[e - min_] += 1

    i = len(cnts) - 1
    while cnts[i] < k:
        k -= cnts[i]
        i -= 1

    return i + min_ 

Test:

In [16]:
for _ in range(10):
    T = sorted([randint(-20, 20) for _ in range(randint(1, 100))])
    for k in range(1, len(T) + 1):
        a = kth_largest(T, k)
        b = kth_largest2(T, k)
        if a !=b:
            print(a, b, k, T)

### Quickselect $O(n\log{n})$

Average $O(N)$.

(...) It is related to the quicksort sorting algorithm. Like quicksort, it was developed by Tony Hoare, and thus is also known as Hoare's selection algorithm. Like quicksort, it is efficient in practice and has good average-case performance, but has poor worst-case performance. Quickselect and its variants are the selection algorithms most often used in efficient real-world implementations.

![](img/quickselect.gif)

In [17]:
def partition(T, l, r):
  n = len(T)
  v = T[l]

  i, j = l, r
  while i < j:
    i += 1
    while i < n and T[i] < v:
      i += 1
    
    j -= 1
    while j >= l + 1 and T[j] >= v:
      j -= 1

    if i < j:
      T[i], T[j] = T[j], T[i]

  T[l], T[j] = T[j], T[l]
  return j

def quick_select(T, k):
  l, r = 0, len(T)
  while True:
    c = partition(T, l, r)
    if c == len(T) - k:
      return T[c]
    elif c < len(T) - k:
      l = c + 1
    elif c > len(T) - k:
      r = c
  return T[c]

Test:

In [22]:
for _ in range(100):
    T = sorted([randint(-20, 20) for _ in range(randint(1, 10))])
    for k in range(1, len(T) + 1):
        a = kth_largest(T, k)
        b = quick_select(T, k)
        if a != b:
            print(a, b, k, T)

## Median

### $O(n\log{n})$

Using sorting.

### $O(n\log{n})$

Using *Quick Select* which is usually faster than by sorting.