In [None]:
import logging
import time

import numpy as np
from dask.distributed import Client, worker_client
import dask

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("Dask Demo")

In [None]:
client = Client()

# client = Client()
print(f"Connected to Dask at: {client.dashboard_link}")

In [None]:
slowness = 1
failure_rate = 0

In [None]:
def load_data(size: int, latency_s: float = slowness) -> np.ndarray:
    # Pretend we're fetching from remote storage
    time.sleep(latency_s)
    return np.arange(size, dtype=np.int64)


def check_divisibility_single(data: np.ndarray, by: int) -> np.ndarray:
    if np.random.random() < failure_rate:
        logger.warning("Random failure occurred!")
        raise ValueError("Random failure occurred!")
    # Random sleep to simulate work
    time.sleep(by * slowness * np.random.random())
    return (data % by) == 0


def check_divisibility(data: np.ndarray, divisors: np.ndarray) -> np.ndarray:
    parts = [
        check_divisibility_single(data, by)
        for by in divisors
    ]
    return np.logical_and.reduce(parts)


def is_divisible_by(
    data: np.ndarray, divisors: np.ndarray, num_chunks: int = 4
) -> np.int64:
    """Check if data is divisible by divisors.

    Args:
        data: Data to check divisibility of.
        divisors: Divisors to check divisibility by.
        num_chunks: Number of chunks to split data into.

    Returns:
        Data that is divisible by divisors.
    """
    # Split data into chunks
    chunks = np.array_split(data, num_chunks)
    # Check divisibility of each chunk
    parts = [check_divisibility(c, divisors) for c in chunks]
    return data[np.concatenate(parts, axis=0)]

In [None]:
size = 10_000
num_chunks = 5
divisors = np.array([3, 5, 7])

In [None]:
data = load_data(size)

In [None]:
result = is_divisible_by(data, divisors, num_chunks)

In [None]:
result