In [1]:
import os
import json
from pathlib import Path

import numpy as np
from PIL import Image
from tqdm import tqdm

import torch

from torchsig.datasets.dataset_metadata import DatasetMetadata
from torchsig.datasets.datasets import TorchSigIterableDataset
from torchsig.transforms.transforms import Spectrogram
from torchsig.transforms.metadata_transforms import YOLOLabel
from torchsig.utils.data_loading import WorkerSeedingDataLoader
from torchsig.utils.writer import DatasetCreator, default_collate_fn

In [None]:

# ============================================================
# 0. GLOBAL CONFIG
# ============================================================

BASE_ROOT = "./datasets/rf_benchmark"

RAW_ROOT  = os.path.join(BASE_ROOT, "raw_iq_hdf5")
SPEC_ROOT = os.path.join(BASE_ROOT, "spectrograms_hdf5")
YOLO_ROOT = os.path.join(BASE_ROOT, "spectrograms_yolo")

os.makedirs(RAW_ROOT, exist_ok=True)
os.makedirs(SPEC_ROOT, exist_ok=True)
os.makedirs(YOLO_ROOT, exist_ok=True)

TOTAL_SAMPLES = 1000
# Split sizes
N_TRAIN = int(TOTAL_SAMPLES * .7)
N_VAL   = int(TOTAL_SAMPLES * .15)
N_TEST  = int(TOTAL_SAMPLES * .15)

SPLITS = {
    "train": N_TRAIN,
    "val":   N_VAL,
    "test":  N_TEST,
}

# Signal / spectrogram configuration
fft_size = 64
num_iq_samples_dataset = fft_size ** 2
sample_rate = 10_000_000.0

num_signals_min = 0
num_signals_max = 5

# Use any class list you like â€“ this is just an example
class_list = [
    "bpsk",
    "qpsk",
    "8psk",
    "16qam",
    "64qam",
    "2fsk",
    "4fsk",
    "am-dsb",
    "am-lsb",
    "fm",
]

BASE_SEED = 123456789

# Shared DatasetMetadata
metadata = DatasetMetadata(
    num_iq_samples_dataset=num_iq_samples_dataset,
    fft_size=fft_size,
    sample_rate=sample_rate,
    num_signals_min=num_signals_min,
    num_signals_max=num_signals_max,
    impairment_level=2,
    class_list=class_list,
    snr_db_min=0,
    snr_db_max=30.0,
)

In [None]:

# ============================================================
# 1. UTILS
# ============================================================

def set_global_seeds(seed: int):
    torch.manual_seed(seed)
    np.random.seed(seed)

def identity_collate(batch):
    return batch

def create_hdf5_split(
    split_name: str,
    num_samples: int,
    split_seed: int,
    out_root: str,
    transforms,
):
    """
    Generic helper: TorchSigIterableDataset -> WorkerSeedingDataLoader -> DatasetCreator
    to write an HDF5 dataset with `num_samples` examples.
    """
    split_root = os.path.join(out_root, split_name)
    os.makedirs(split_root, exist_ok=True)

    set_global_seeds(split_seed)

    ds = TorchSigIterableDataset(
        dataset_metadata=metadata,
        transforms=transforms,
        target_labels=None,      # keep full Signal+metadata internally
    )

    loader = WorkerSeedingDataLoader(
        ds,
        batch_size=11,
        num_workers=4,
        collate_fn=identity_collate,
    )
    loader.seed(split_seed)

    creator = DatasetCreator(
        dataset_length=num_samples,
        dataloader=loader,
        root=split_root,
        overwrite=True,
        multithreading=False,
    )

    print(f"[HDF5] Creating {split_name} in {out_root} ({num_samples} samples)")
    creator.create()


def make_yolo_iterable_loader(seed: int) -> WorkerSeedingDataLoader:
    """
    Iterable dataset that outputs spectrogram + YOLO labels directly,
    for PNG + txt + JSON export (no HDF5).
    """
    ds = TorchSigIterableDataset(
        dataset_metadata=metadata,
        transforms=[Spectrogram(fft_size=fft_size), YOLOLabel()],
        target_labels=["yolo_label"],
    )

    loader = WorkerSeedingDataLoader(
        ds,
        batch_size=1,
        num_workers=0,
        collate_fn=default_collate_fn,
    )
    loader.seed(seed)
    return loader


# ============================================================
# 2. RAW IQ HDF5 DATASET
# ============================================================

def create_raw_iq_hdf5():
    for i, (split, n) in enumerate(SPLITS.items()):
        create_hdf5_split(
            split_name=split,
            num_samples=n,
            split_seed=BASE_SEED + 10 * i,
            out_root=RAW_ROOT,
            transforms=[],     # RAW IQ
        )


# ============================================================
# 3. SPECTROGRAM HDF5 DATASET (FOR GENERAL VISION MODELS)
# ============================================================

def create_spectrogram_hdf5():
    for i, (split, n) in enumerate(SPLITS.items()):
        create_hdf5_split(
            split_name=split,
            num_samples=n,
            split_seed=BASE_SEED + 20 * i,
            out_root=SPEC_ROOT,
            transforms=[Spectrogram(fft_size=fft_size)],
        )


# ============================================================
# 4. YOLO/vision-FRIENDLY SPECTROGRAM DATASET (PNG + TXT + JSON)
# ============================================================

def export_yolo_split(split_name: str, num_samples: int, split_seed: int):
    """
    Creates, for a given split:
      - images/*.png    (spectrograms)
      - labels/*.txt    (YOLO format: id xc yc w h)
      - annotations.json (all labels together)
    """
    split_root = Path(YOLO_ROOT) / split_name
    img_dir = split_root / "images"
    lbl_dir = split_root / "labels"
    img_dir.mkdir(parents=True, exist_ok=True)
    lbl_dir.mkdir(parents=True, exist_ok=True)

    set_global_seeds(split_seed)
    loader = make_yolo_iterable_loader(seed=split_seed)

    annotations = {
        "split": split_name,
        "fft_size": fft_size,
        "num_iq_samples_dataset": num_iq_samples_dataset,
        "sample_rate": sample_rate,
        "images": [],
    }

    print(f"\n[YOLO] Exporting {split_name} to {split_root} ({num_samples} samples)")
    for idx, (x, meta) in tqdm(
        enumerate(loader),
        total=num_samples,
        desc=f"{split_name} export",
        leave=False,
    ):
        if idx >= num_samples:
            break

        # ---- spectrogram -> PNG ----
        spec = x[0]  # remove batch dim
        if spec.ndim == 3:   # (C, F, T)
            spec_img = spec[0]
        else:                # (F, T)
            spec_img = spec

        spec_img = spec_img.astype(np.float32)
        spec_img -= spec_img.min()
        if spec_img.max() > 0:
            spec_img /= spec_img.max()
        spec_img = (spec_img * 255.0).astype(np.uint8)

        h, w = spec_img.shape
        stem = f"{idx:06d}"
        img_path = img_dir / f"{stem}.png"
        Image.fromarray(spec_img).save(img_path)

        # ---- YOLO labels -> .txt + JSON ----
        yolo_list = meta["yolo_label"][0]          # list of tuples
        yolo_list = [ann for ann in yolo_list if ann is not None]

        txt_path = lbl_dir / f"{stem}.txt"
        with open(txt_path, "w") as f:
            for cid, xc, yc, bw, bh in yolo_list:
                # Same format as detector_example.ipynb
                f.write(f"{int(cid)} {xc:.15f} {yc:.15f} {bw:.15f} {bh:.15f}\n")

        annotations["images"].append({
            "id": idx,
            "file_name": f"{stem}.png",
            "width": w,
            "height": h,
            "annotations": [
                {
                    "class_id": int(cid),
                    "x_center": float(xc),
                    "y_center": float(yc),
                    "width": float(bw),
                    "height": float(bh),
                }
                for (cid, xc, yc, bw, bh) in yolo_list
            ],
        })

    anno_path = split_root / "annotations.json"
    with open(anno_path, "w") as f:
        json.dump(annotations, f, indent=2)

    print(f"[YOLO] Done {split_name}: {len(annotations['images'])} samples")


def create_yolo_spectrogram_dataset():
    for i, (split, n) in enumerate(SPLITS.items()):
        export_yolo_split(
            split_name=split,
            num_samples=n,
            split_seed=BASE_SEED + 30 * i,
        )

In [19]:
print("=== Creating RAW IQ HDF5 datasets ===")
create_raw_iq_hdf5()

print("\n=== Creating Spectrogram HDF5 datasets ===")
create_spectrogram_hdf5()

print("\n=== Creating YOLO / vision spectrogram dataset (PNG + TXT + JSON) ===")
create_yolo_spectrogram_dataset()

print("\nAll three dataset types created under:", BASE_ROOT)


=== Creating RAW IQ HDF5 datasets ===


0it [00:00, ?it/s]

[HDF5] Creating train in ./datasets/rf_benchmark/raw_iq_hdf5 (700 samples)
Deleted folder: datasets/rf_benchmark/raw_iq_hdf5/train


  0%|          | 0/64 [00:00<?, ?it/s]

0it [00:00, ?it/s]

[HDF5] Creating val in ./datasets/rf_benchmark/raw_iq_hdf5 (150 samples)
Deleted folder: datasets/rf_benchmark/raw_iq_hdf5/val


  0%|          | 0/14 [00:00<?, ?it/s]

0it [00:00, ?it/s]

[HDF5] Creating test in ./datasets/rf_benchmark/raw_iq_hdf5 (150 samples)
Deleted folder: datasets/rf_benchmark/raw_iq_hdf5/test


  0%|          | 0/14 [00:00<?, ?it/s]


=== Creating Spectrogram HDF5 datasets ===


0it [00:00, ?it/s]

[HDF5] Creating train in ./datasets/rf_benchmark/spectrograms_hdf5 (700 samples)
Deleted folder: datasets/rf_benchmark/spectrograms_hdf5/train


  0%|          | 0/64 [00:00<?, ?it/s]

0it [00:00, ?it/s]

[HDF5] Creating val in ./datasets/rf_benchmark/spectrograms_hdf5 (150 samples)
Deleted folder: datasets/rf_benchmark/spectrograms_hdf5/val


  0%|          | 0/14 [00:00<?, ?it/s]

0it [00:00, ?it/s]

[HDF5] Creating test in ./datasets/rf_benchmark/spectrograms_hdf5 (150 samples)
Deleted folder: datasets/rf_benchmark/spectrograms_hdf5/test


  0%|          | 0/14 [00:00<?, ?it/s]


=== Creating YOLO / vision spectrogram dataset (PNG + TXT + JSON) ===

[YOLO] Exporting train to datasets/rf_benchmark/spectrograms_yolo/train (700 samples)




AttributeError: 'numpy.ndarray' object has no attribute 'cpu'