# Fractional Cascading

## Problem Statement

Given a `k` list of sorted integers and a value `x`. Given a query value `x` return the largest value less than or equal to `x` in each of the `k` lists.

In [1]:
import bisect

In [2]:
arr = [
    [24, 64, 65, 80, 93],
    [23, 25, 26],
    [13, 44, 62, 66],
    [11, 35, 46, 79, 81],
]

r = 80

## Slower Queries Less Space | Naive Way - `k` binary searches

In [3]:
def get_locations_1(x): 
    return [bisect.bisect_left(l, x) for l in arr]

In [4]:
get_locations_1(r)

[3, 3, 4, 4]

## Faster Queries More Space | Global Ordering

In [5]:
# For each of the element in `arr` positions will hold
# the location where the element `arr[i][j]` will be inserted.
positions = []
for i in range(len(arr)):
    positions.append([])
    for j in range(len(arr[i])):
        positions[i].append([[]] * len(arr[i]))
        positions[i][j] = [-1] * len(arr)

In [6]:
for i, l in enumerate(arr):
    for j, e in enumerate(l):
        for k, m in enumerate(arr):
            positions[i][j][k] = int(bisect.bisect_left(m, e))

See where `17` - 4th list 4th element

In [7]:
positions[3][3]

[3, 3, 4, 3]

In [8]:
pos_arr = sorted([
    (y, positions[i][j],)
    for i, x in enumerate(arr)
        for j, y in enumerate(x)
], key=lambda x: x[0])

In [9]:
pos_arr[:5]

[(11, [0, 0, 0, 0]),
 (13, [0, 0, 0, 1]),
 (23, [0, 0, 1, 1]),
 (24, [0, 1, 1, 1]),
 (25, [1, 1, 1, 1])]

In [10]:
U = list(zip(*pos_arr))[0]

In [11]:
def get_locations_2(x): 
    index = bisect.bisect_left(U, x)
    if index == len(pos_arr):
        return [len(l) for l in arr]
    return pos_arr[index][1]

In [12]:
get_locations_2(r)

[3, 3, 4, 4]

## Best of both Worlds | Fractional Cascading

In [13]:
MIN_VAL, MAX_VAL = -1000000000, 1000000000

In [14]:
m_arr = []

In [15]:
m_arr.insert(0, [x for x in arr[-1]])

In [16]:
for i in range(len(arr) - 2, -1, -1):
    m_arr.insert(0, sorted([x for k, x in enumerate(m_arr[0]) if k % 2] + arr[i]))

In [17]:
for l in m_arr:
    l.insert(0, MIN_VAL)
    l.append(MAX_VAL)

In [18]:
m_arr

[[-1000000000, 24, 25, 35, 64, 65, 79, 80, 93, 1000000000],
 [-1000000000, 23, 25, 26, 35, 62, 79, 1000000000],
 [-1000000000, 13, 35, 44, 62, 66, 79, 1000000000],
 [-1000000000, 11, 35, 46, 79, 81, 1000000000]]

In [19]:
# For each of the element in `arr` positions will hold
# the location where the element `arr[i][j]` will be inserted.
pointers = []
for i in range(len(m_arr)):
    pointers.append([])
    for j in range(len(m_arr[i])):
        pointers[i].append([[]] * len(arr[i]))
        pointers[i][j] = [-1] * 2

In [20]:
for i, l in enumerate(m_arr):
    for j, m in enumerate(m_arr[i]):
        pointers[i][j] = [
            bisect.bisect_left(arr[i], m_arr[i][j]),
            0 if i == len(m_arr) - 1 else bisect.bisect_left(m_arr[i+1], m_arr[i][j]),
        ]

In [21]:
pointers[:3]

[[[0, 0],
  [0, 2],
  [1, 2],
  [1, 4],
  [1, 6],
  [2, 6],
  [3, 6],
  [3, 7],
  [4, 7],
  [5, 7]],
 [[0, 0], [0, 2], [1, 2], [2, 2], [3, 2], [3, 4], [3, 6], [3, 7]],
 [[0, 0], [0, 2], [1, 2], [1, 3], [2, 4], [3, 4], [4, 4], [4, 6]]]

In [22]:
def get_locations_3(x): 
    locations = []
    loc, next_loc = pointers[0][bisect.bisect_left(m_arr[0], x)]
    locations.append(loc)
    for i in range(1, len(m_arr)):
        if x <= m_arr[i][next_loc-1]:
            loc, next_loc = pointers[i][next_loc-1]
        else:
            loc, next_loc = pointers[i][next_loc]
        locations.append(loc)
    return locations

In [23]:
get_locations_3(r)

[3, 3, 4, 4]