In [6]:

import numpy as np
from os import makedirs
from os.path import join, exists

file_path   = "/home/fabio/work/HM_and_AI_models/VAE_Model/x_stoch.npy"
out_dir     = "/home/fabio/work/HM_and_AI_models/VAE_Model"
out_fname   = "combined_shuffled.npy"

left_limit  = 0          # inclusive start index in the time dimension
right_limit = 299_000    # inclusive end index   (clip at data.shape[0]-1)

lower_bound = 5          # adjust as needed
upper_bound = 53.8 / 5   # adjust as needed

duplicate_factor = 1     # k extra copies ⇒ total count = original + k
random_seed = None       # set to an int for reproducible shuffling


# def main() -> None:
#     # 2. load & time‑slice
#     data = np.load(file_path)
#     print("[INFO] original shape:", data.shape)

#     # clip limits so we never go out of bounds
#     right = min(right_limit, data.shape[0] - 1)
#     sliced = data[left_limit : right + 1]      

#     # boolean mask
#     col63        = sliced[:, :, 63]                
#     in_range     = (lower_bound < col63) & (col63 < upper_bound)
#     transition_mask = in_range.any(axis=1)        

#     filtered = sliced[transition_mask]
#     print(f"[INFO] selected {filtered.shape[0]} transition slices")

#     dup_array = np.repeat(filtered, duplicate_factor, axis=0)
#     print(f"[INFO] after duplication: {dup_array.shape[1]} extra rows")

#     combined = np.concatenate((data, dup_array), axis=0)
#     print("[INFO] combined shape (before 1huffle):", combined.shape)

#     if random_seed is not None:
#         np.random.seed(random_seed)
#     perm      = np.random.permutation(combined.shape[0])
#     shuffled  = combined[perm]
#     print("[INFO] final shuffled shape:", shuffled.shape)

#     # 7. save
#     if not exists(out_dir):
#         makedirs(out_dir)
#     out_path = join(out_dir, out_fname)
#     np.save(out_path, shuffled)
#     print(f"[DONE] saved to {out_path}")

def main() -> None:
    BLOCK = 6_000          # <- size of the unbreakable block you want to shuffle
    rng    = np.random.default_rng(random_seed)

    # 1. load & optional time‑slice
    data = np.load(file_path)
    print("[INFO] original shape:", data.shape)

    right   = min(right_limit, data.shape[0] - 1)
    sliced  = data[left_limit : right + 1]

    # 2. build transition mask on column 63
    col63            = sliced[:, :, 63]
    in_range         = (lower_bound < col63) & (col63 < upper_bound)
    transition_mask  = in_range.any(axis=1)

    filtered = sliced[transition_mask]
    print(f"[INFO] selected {filtered.shape[0]} transition slices")

    # 3. duplicate the transition slices
    dup_array = np.repeat(filtered, duplicate_factor, axis=0)
    print(f"[INFO] after duplication: {dup_array.shape[0]} extra rows")

    # 4. stack originals + duplicates
    combined = np.concatenate((data, dup_array), axis=0)
    print("[INFO] combined shape (pre‑shuffle):", combined.shape)

    # 5. shuffle in blocks of BLOCK rows
    total_rows = combined.shape[0] - (combined.shape[0] % BLOCK)   # drop incomplete tail
    main_part  = combined[:total_rows]
    leftover   = combined[total_rows:]                             # < BLOCK rows, keep order

    n_blocks   = total_rows // BLOCK
    blocks     = main_part.reshape(n_blocks, BLOCK, *combined.shape[1:])

    perm       = rng.permutation(n_blocks)
    shuffled   = blocks[perm].reshape(-1, *combined.shape[1:])

    if leftover.size:                                              # append any dropped tail
        shuffled = np.vstack((shuffled, leftover))

    print("[INFO] final shuffled shape:", shuffled.shape)

    # 6. save
    if not exists(out_dir):
        makedirs(out_dir)
    out_path = join(out_dir, out_fname)
    np.save(out_path, shuffled)
    print(f"[DONE] saved to {out_path}")

if __name__ == "__main__":
    main()


[INFO] original shape: (299400, 2, 75)
[INFO] selected 35640 transition slices
[INFO] after duplication: 35640 extra rows
[INFO] combined shape (pre‑shuffle): (335040, 2, 75)
[INFO] final shuffled shape: (335040, 2, 75)
[DONE] saved to /home/fabio/work/HM_and_AI_models/VAE_Model/combined_shuffled.npy
