In [1]:
import random
import os

In [None]:
def gen_grouped_sparse_vector(sparsity, N, group=2, group_sparsity=50, seed=None):
    """
    Generate a 1D sparse vector with exact targets for:
      - sparsity (% of zeros over N)
      - group_sparsity (% of zeros that belong to runs of length >= group)
    Strategy:
      1) Place exact number of non-overlapping groups (length = group) first, with 1-cell guard if possible
      2) Place remaining single zeros avoiding creating new runs of length >= group
         (gradually relax constraints if space runs out)
    """
    if seed is not None:
        random.seed(seed)

    total_zero = round(N * sparsity / 100.0)
    total_zero = max(0, min(N, total_zero))

    target_group_zero = round(total_zero * group_sparsity / 100.0)
    target_group_zero = max(0, min(total_zero, target_group_zero))

    # Exact number of groups (each of length = group)
    num_groups = 0 if group <= 0 else min(target_group_zero // group, N // max(1, group))
    # Recompute exact grouped zeros we will place
    grouped_zero_to_place = num_groups * group
    single_zero_to_place = total_zero - grouped_zero_to_place

    vec = [None] * N  # None = not assigned yet

    # --- Helper to check if placing a group at 'start' is valid (no overlap, keep 1-cell guard if possible) ---
    def can_place_group(start):
        # range [start, start+group-1] must be inside vector
        if start < 0 or start + group > N:
            return False
        # check overlap
        for i in range(start, start + group):
            if vec[i] == 0:
                return False
        # guard: try to keep 1 non-zero cell at both ends to avoid merging
        if start - 1 >= 0 and vec[start - 1] == 0:
            return False
        if start + group < N and vec[start + group] == 0:
            return False
        return True

    # --- Place groups exactly ---
    candidate_starts = list(range(0, N - group + 1))
    random.shuffle(candidate_starts)

    placed_groups = 0
    for s0 in candidate_starts:
        if placed_groups >= num_groups:
            break
        if can_place_group(s0):
            for k in range(group):
                vec[s0 + k] = 0
            placed_groups += 1

    # If we failed to place enough groups due to tight N/constraints, fallback: place as many as possible
    grouped_zero_actual = sum(1 for v in vec if v == 0)
    # Note: grouped_zero_actual here counts zeros from groups only (since singles not placed yet)

    # --- Place single zeros while trying not to create new long runs (>= group) ---
    # We try to place singles in positions that do NOT extend any run to length >= group.
    def would_create_long_run(idx):
        # Temporarily assume vec[idx]=0 and check max run length around idx
        left = idx
        while left - 1 >= 0 and (vec[left - 1] == 0):
            left -= 1
        right = idx
        while right + 1 < N and (vec[right + 1] == 0):
            right += 1
        run_len = right - left + 1
        return run_len >= group

    # Pass 1: strict — place singles only where not creating runs >= group
    singles_to_place = single_zero_to_place
    indices = list(range(N))
    random.shuffle(indices)

    for i in indices:
        if singles_to_place == 0:
            break
        if vec[i] is None:
            if group <= 1:
                # trivial: any zero becomes a "group" when group==1; skip strict check
                vec[i] = 0
                singles_to_place -= 1
            else:
                if not would_create_long_run(i):
                    vec[i] = 0
                    singles_to_place -= 1

    # Pass 2: relaxed — if we still have singles left, allow creating groups but avoid merging two groups into longer ones if possible
    if singles_to_place > 0:
        for i in indices:
            if singles_to_place == 0:
                break
            if vec[i] is None:
                vec[i] = 0
                singles_to_place -= 1

    # Fill remaining None with random non-zeros
    for i in range(N):
        if vec[i] is None:
            vec[i] = random.uniform(-1.0, 1.0)

    return vec


def check_vector_sparsity(vec, group=2):
    """
    Measure:
      - sparsity (% of zeros)
      - group_sparsity (% of zeros that lie in runs of length >= group)
    """
    N = len(vec)
    zero_count = sum(1 for v in vec if v == 0)
    sparsity = 100.0 * zero_count / N if N > 0 else 0.0

    # Count zeros that belong to any run with length >= group
    grouped_zero = 0
    i = 0
    while i < N:
        if vec[i] == 0:
            j = i
            while j < N and vec[j] == 0:
                j += 1
            run_len = j - i
            if run_len >= group:
                grouped_zero += run_len
            i = j
        else:
            i += 1

    group_sparsity = 100.0 * grouped_zero / zero_count if zero_count > 0 else 0.0
    return sparsity, group_sparsity


# -------------------------
# Quick test
if __name__ == "__main__":
    v = gen_grouped_sparse_vector(sparsity=50, N=200, group=2, group_sparsity=5, seed=10)
    s, gs = check_vector_sparsity(v, group=2)
    print(f"sparsity={s:.2f}%, group_sparsity={gs:.2f}%")


sparsity=50.00%, group_sparsity=36.00%
