In [4]:
import math
import numpy as np

In [9]:
def sample_uniform(a, b, sample_size):
    """
    Generates a uniform distribution of samples between a and b.
    
    Parameters:
    a (float): Lower bound of the dist.
    b (float): Upper bound of the dist.
    sample_size (int): Number of samples to generate.

    returns:
    List of floats: Generated samples.
    """
    return [a + (b-a) * np.random.rand() for _ in range(sample_size)]


def sample_gaussian(mu, sigma, sample_size):
    """
    Generates a gaussian distribution of samples with mean as mu and standard deviation as sigma.

    Parameters:
    mu (float): Mean of the dist.
    sigma (float): Standard deviation of the dist.
    sample_size (int): Number of samples to generate.

    returns:
    List of floats: Generated samples.
    """
    samples = []
    while len(samples) < sample_size:
        u1, u2 = np.random.rand(), np.random.rand()
        r = math.sqrt(-2 * math.log(u1))
        theta = 2 * math.pi * u2
        z0, z1 = r * math.cos(theta), r * math.sin(theta)
        samples.extend([mu + sigma * z0, mu + sigma * z1])
    return samples[:sample_size]


def sample_2d_gaussian(mu_2d, sigma_2d, sample_size):
    """
    Generates a 2D gaussian distribution of samples with mean as mu_2d and standard deviation as sigma_2d.

    Parameters:
    mu_2d (tuple): Mean of the dist as (mu_x, mu_y).
    sigma_2d (tuple): Standard deviation of the dist as (sigma_x, sigma_y).
    sample_size (int): Number of samples to generate.

    returns:
    List of tuples: Generated samples as (x, y).
    """
    samples = []
    L = np.linalg.cholesky(sigma_2d)
    for _ in range(sample_size):
        z = np.array([sample_gaussian(0, 1, 1)[0], sample_gaussian(0, 1, 1)[0]])
        x = mu_2d + L @ z
        samples.append(x)
    return np.array(samples)


def sample_discrete_stevens(pmf, sample_size):
    """
    Generates a discrete distribution of samples based on the given probability mass function (pmf).

    Parameters:
    pmf (list): Probability mass function as a list of probabilities.
    sample_size (int): Number of samples to generate.

    returns:
    List of ints: Generated samples as indices of the pmf.
    """
    assert math.isclose(sum(pmf), 1.0), "PMF must sum to 1"
    M = len(pmf)
    population = np.arange(M)
    selected_indices = []

    for _ in range(sample_size):
        u = np.random.rand() * sum(pmf)  # Scale the random number to the remaining probabilities.
        cumulative = 0.0

        # Find the element corresponding to the random value.
        # This is a linear search, but it could be optimized with binary search.
        # However, since the pmf is small (less than 1000), linear search is acceptable.
        # This is a direct implementation of the Stevens sampling method.
        for i, p in enumerate(pmf):
            cumulative += p
            if u <= cumulative:
                selected_indices.append(population[i])
                pmf[i] = 0.0  # Set the probability to zero to avoid selecting the same index again.
                break
        pmf /= sum(pmf)
    return selected_indices

In [12]:
print("Uniform Samples:", sample_uniform(0, 10, 5))
print("Gaussian Samples:", sample_gaussian(0, 1, 5))

mu_2d = np.array([0, 0])
sigma_2d = np.array([[1, 0.5], [0.5, 1]])
print("2D Gaussian Samples:", sample_2d_gaussian(mu_2d, sigma_2d, 5))

M = 300
N = 20
pmf = np.random.rand(M)
pmf /= sum(pmf)  # Normalize the pmf
print("Discrete Without Replacement:", sample_discrete_stevens(pmf, N))

Uniform Samples: [4.3183013250287665, 2.3665232815662005, 9.047716964797035, 0.2168556618508355, 6.509032347505657]
Gaussian Samples: [1.5398349919645868, -1.4267385667511858, -0.5748791860804163, 0.8890557028233477, 1.6750513109086274]
2D Gaussian Samples: [[-0.2964531   1.62983835]
 [ 1.18558458  0.94417965]
 [-0.11066802  0.05150721]
 [ 0.44194687 -0.2338797 ]
 [-0.94527056 -0.66917456]]
Discrete Without Replacement: [61, 154, 101, 259, 165, 111, 139, 151, 120, 190, 175, 100, 247, 78, 217, 176, 256, 240, 90, 251]
