In [1]:
import numpy as np
from functools import reduce
from typing import TypeVar, Callable

T = TypeVar('T') # generic type for output of F(M)

def R(x: T, row: np.ndarray): ... # reducer

initial_value: T = ...

def F(M: np.ndarray) -> T:
  return reduce(R, M, initial_value)

In [2]:
A = np.random.random_sample(size=(10**6, 5))

B = np.random.random_sample(size=(5, 10**3))

M = np.matmul(A, B)

In [3]:
from numpy.random import default_rng

rng = default_rng()
def F_approx(A: np.ndarray,
             B: np.ndarray,
             sample_size: int) -> T:
  subsampled_A = rng.choice(A, sample_size, replace=False)
  return F(np.matmul(subsampled_A, B))

In [4]:
# set up for computing average scores

# as an example, let's use the median as the score
def score(row: np.ndarray) -> float:
  return np.median(row)

def R(x: float, row: np.ndarray):
  return x + score(row)

initial_value: float = 0.

real_value = F(M) / M.shape[0]
real_value

1.2674936944753523

In [5]:
for s in (738, 1060, 18445):
  approx_value = F_approx(A, B, s) / s
  print(f"s = {s}, approx: {approx_value}, difference={approx_value - real_value}")

s = 738, approx: 1.2768949729888694, difference=0.00940127851351713
s = 1060, approx: 1.2557004072894151, difference=-0.011793287185937196
s = 18445, approx: 1.265779680304528, difference=-0.001714014170824285


In [6]:
# set up for computing max of every column

def R(x: np.ndarray, row: np.ndarray):
  return np.maximum(x, row)

initial_value: np.ndarray = np.full((1, M.shape[1]), -np.inf)

In [7]:
def is_top_pth_percentile(sample_maxes: np.ndarray, M: np.ndarray, p: float) -> np.ndarray:
  return np.sum(M > sample_maxes, axis=0) / M.shape[0] < p

In [8]:
for s, p in ((194, .05), (986, .01)):
  sample_maxes = F_approx(A, B, s)
  num_invalid = np.sum( ~ is_top_pth_percentile(sample_maxes, M, p))
  print(f"s = {s}, number of columns not in top {p}'th percentile is {num_invalid}")

s = 194, number of columns not in top 0.05'th percentile is 0
s = 986, number of columns not in top 0.01'th percentile is 0
