This problem was asked by Facebook.

Given a stream of elements too large to store in memory, pick a random element from the stream with uniform probability.

In [10]:
import random as rnd

def sample(iterator):
    """ Uniform sampling of an element from an stream.
    The i-th element (indexed at 1) is sampled with probability 1/i.
    This can be shown by recurrence to lead to uniform sampling. 
    Indeed, at step n we impose p(val==n)=1/n, and all elements i before n 
    were sampled with equal probability, which is also equal to 1/n since
    p(val<n) = (n-1)*p(val==i) = 1-1/n = (n-1)/n => p(val==i) = 1/n """
    i = 1
    out = None
    for elm in iterator:
        if rnd.uniform(0,1) <= 1/i:
            out = elm
        i += 1
    return out

In [16]:
vals = [sample(range(5)) for i in range(100000)]

In [17]:
import numpy as np
np.unique(vals, return_counts=True)

(array([0, 1, 2, 3, 4]), array([19981, 20071, 20081, 20090, 19777]))