In [4]:
import numpy as np

def generate_balanced_thresholds(arr, N):
    """
    Generates N-1 balanced thresholds for an array of integers.

    Parameters:
    - arr (np.ndarray): The input array of integers in range [0, L-1].
    - N (int): The number of desired groups.

    Returns:
    - thresholds (list): List of N-1 threshold values.
    """
    if N <= 1:
        raise ValueError("N must be greater than 1.")

    # Count the frequency of each unique value
    unique, counts = np.unique(arr, return_counts=True)
    freq_dict = dict(zip(unique, counts))

    # Sorting the unique values by their frequencies
    sorted_values = sorted(freq_dict.keys())
    total_samples = len(arr)

    # Initialize variables for threshold calculation
    thresholds = []
    cum_count = 0
    group_size = total_samples / N
    current_group_count = 0
    group_index = 0

    # Calculate N-1 thresholds
    for value in sorted_values:
        cum_count += freq_dict[value]
        current_group_count += freq_dict[value]

        # Check if the current group is full
        if current_group_count >= group_size:
            thresholds.append(value)
            group_index += 1
            current_group_count = 0

            # If we have enough thresholds, stop
            if group_index == N - 1:
                break

    return thresholds


def assign_groups(arr, thresholds):
    """
    Assigns groups to an array based on given thresholds.

    Parameters:
    - arr (np.ndarray): The input array of integers.
    - thresholds (list): The list of threshold values.

    Returns:
    - group_arr (np.ndarray): The grouped array with values ranging from 0 to N-1.
    """
    group_arr = np.zeros_like(arr)
    for i, thres in enumerate(thresholds):
        group_arr[arr > thres] = i + 1

    return group_arr


# Example Usage
arr = np.array([0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 9])
N = 3
thresholds = generate_balanced_thresholds(arr, N)
group_arr = assign_groups(arr, thresholds)

print(f"Thresholds: {thresholds}")
print(f"Grouped Array: {group_arr}")


Thresholds: [3, 7]
Grouped Array: [0 0 0 0 0 1 1 1 1 1 2 2 2 2]
