In [1]:
from pathlib import Path
import json

import h5py
import numpy as np
from tqdm import tqdm

from core.simulate import simulate_rir


np.random.seed(42)

In [2]:
# ------------------------------
# Configuration
# ------------------------------
FS = 16000
N_SAMPLES = 50000
MAX_RIR_LEN = 8192  # All RIRs padded or truncated to this length
OUTPUT_PATH = "data/rir_dataset.h5"

# ------------------------------
# Preallocate arrays
# ------------------------------
rirs = np.zeros((N_SAMPLES, MAX_RIR_LEN), dtype=np.float32)
room_dims = np.zeros((N_SAMPLES, 3), dtype=np.float32)
absorption = np.zeros(N_SAMPLES, dtype=np.float32)
source_pos = np.zeros((N_SAMPLES, 3), dtype=np.float32)
mic_pos = np.zeros((N_SAMPLES, 3), dtype=np.float32)
max_order = np.zeros(N_SAMPLES, dtype=np.int32)

# ------------------------------
# Generate RIRs
# ------------------------------
progress = tqdm(total=N_SAMPLES, desc="Simulating RIRs")
valid_count = 0
trial = 0

while valid_count < N_SAMPLES:
    try:
        rir, meta = simulate_rir()
        rir = np.array(rir, dtype=np.float32)

        if len(rir) > MAX_RIR_LEN:
            rir = rir[:MAX_RIR_LEN]
        else:
            rir = np.pad(rir, (0, MAX_RIR_LEN - len(rir)))

        rirs[valid_count] = rir
        room_dims[valid_count] = meta["room_dim"]
        absorption[valid_count] = meta["absorption"]
        source_pos[valid_count] = meta["source_pos"]
        mic_pos[valid_count] = meta["mic_pos"]
        max_order[valid_count] = meta["max_order"]

        valid_count += 1
        progress.update(1)

    except Exception as e:
        print(f"Skipping trial {trial}: {e}")

    trial += 1

progress.close()

# ------------------------------
# Trim arrays to actual count
# ------------------------------
rirs = rirs[:valid_count]
room_dims = room_dims[:valid_count]
absorption = absorption[:valid_count]
source_pos = source_pos[:valid_count]
mic_pos = mic_pos[:valid_count]
max_order = max_order[:valid_count]

# ------------------------------
# Write to HDF5
# ------------------------------
with h5py.File(OUTPUT_PATH, "w") as f:
    f.create_dataset("rirs", data=rirs)
    f.create_dataset("room_dims", data=room_dims)
    f.create_dataset("absorption", data=absorption)
    f.create_dataset("source_pos", data=source_pos)
    f.create_dataset("mic_pos", data=mic_pos)
    f.create_dataset("max_order", data=max_order)


Simulating RIRs: 100%|██████████| 50000/50000 [04:23<00:00, 189.97it/s]
