In [None]:
### NEW PLAN 

#### Make blcoks  of 6,000 of all the data
### the transtion should make blcosk  of 
### the resuluting thiings then be permuted throughout the entire timeseries
### i would also like to visualize both data sets, the data of the transition points
### and the data of the before permutations with the transitions added and the data
### with the permutations and the transisitions posted


In [1]:
#!/usr/bin/env python3
import numpy as np
import matplotlib.pyplot as plt
from os import makedirs
from os.path import join, exists

# ─── USER CONFIG ────────────────────────────────────────────────────────────
FILE_PATH   = "/home/fabio/work/HM_and_AI_models/VAE_Model/x_stoch.npy"
OUT_DIR     = "/home/fabio/work/HM_and_AI_models/VAE_Model"
OUT_FNAME   = "combined_shuffled_3.npy"

LEFT_LIMIT  = 0
RIGHT_LIMIT = 299_000          # inclusive slice

LOWER_BOUND = 5
UPPER_BOUND = 53.8 / 5

BLOCK_BIG       = 6_000        # original data blocks
BLOCK_SMALL     = 100          # duplicated transition blocks
DUPLICATE_FACTOR = 3
RANDOM_SEED      = None
VISUALIZE        = True
# ────────────────────────────────────────────────────────────────────────────


def _plot(col_data: np.ndarray, title: str, out_path: str) -> None:
    plt.figure(figsize=(10, 3))
    plt.plot(col_data, linewidth=0.6)
    plt.title(title)
    plt.xlabel("row index")
    plt.ylabel("col 63 (ch 0)")
    plt.tight_layout()
    plt.savefig(out_path, dpi=150)
    plt.close()


def _blockify(arr: np.ndarray, size: int) -> tuple[list[np.ndarray], np.ndarray]:
    """Split arr into full blocks of `size`; return ([blocks…], leftover)."""
    usable = arr.shape[0] - arr.shape[0] % size
    blocks = list(arr[:usable].reshape(-1, size, *arr.shape[1:]))
    leftover = arr[usable:]        # may be empty
    return blocks, leftover


def main() -> None:
    rng = np.random.default_rng(RANDOM_SEED)

    # 1. load & slice -------------------------------------------------------------
    data = np.load(FILE_PATH)
    right  = min(RIGHT_LIMIT, data.shape[0] - 1)
    sliced = data[LEFT_LIMIT : right + 1]
    print("[INFO] original slice:", sliced.shape)

    # 2. detect transition rows ---------------------------------------------------
    col63 = sliced[:, :, 63]
    mask  = ((LOWER_BOUND < col63) & (col63 < UPPER_BOUND)).any(axis=1)
    transitions = sliced[mask]
    print(f"[INFO] transition rows: {transitions.shape[0]}")

    # 3. duplicate & pad transitions to 100‑row blocks ----------------------------
    dup = np.repeat(transitions, DUPLICATE_FACTOR, axis=0)
    pad = (-dup.shape[0]) % BLOCK_SMALL
    if pad:
        dup = np.concatenate((dup, dup[:pad]), axis=0)
    print(f"[INFO] duplicated transitions (padded): {dup.shape}")

    # 4. build block lists --------------------------------------------------------
    big_blocks, big_tail   = _blockify(sliced, BLOCK_BIG)
    small_blocks, small_tail = _blockify(dup,    BLOCK_SMALL)

    all_blocks = big_blocks + small_blocks
    perm = rng.permutation(len(all_blocks))
    shuffled_blocks = [all_blocks[i] for i in perm]

    # 5. stitch everything back together -----------------------------------------
    parts = shuffled_blocks
    if big_tail.size:
        parts.append(big_tail)
    if small_tail.size:
        parts.append(small_tail)
    shuffled = np.concatenate(parts, axis=0)
    print("[INFO] final shuffled:", shuffled.shape)

    # 6. save ---------------------------------------------------------------------
    if not exists(OUT_DIR):
        makedirs(OUT_DIR)
    np.save(join(OUT_DIR, OUT_FNAME), shuffled)
    print(f"[DONE] saved → {OUT_FNAME}")

    # 7. plots --------------------------------------------------------------------
    if VISUALIZE:
        _plot(transitions[:, 0, 63], "Transition rows (orig order)",
              join(OUT_DIR, "transitions.png"))
        _plot(np.concatenate((sliced, dup), axis=0)[:, 0, 63],
              "Before shuffle (orig + dup)",
              join(OUT_DIR, "combined.png"))
        _plot(shuffled[:, 0, 63], "After shuffle (6 000 & 100‑row blocks mixed)",
              join(OUT_DIR, "shuffled.png"))
        print("[PLOTS] PNGs written to", OUT_DIR)


if __name__ == "__main__":
    main()


[INFO] original slice: (299001, 2, 75)
[INFO] transition rows: 35640
[INFO] duplicated transitions (padded): (107000, 2, 75)
[INFO] final shuffled: (406001, 2, 75)
[DONE] saved → combined_shuffled_3.npy
[PLOTS] PNGs written to /home/fabio/work/HM_and_AI_models/VAE_Model
