In [1]:
# rename_and_collect_uavirbase_unique.py
import os, json, shutil, re
from pathlib import Path
from collections import defaultdict
from tqdm import tqdm

SRC_ROOT   = r"D:\FYP\Datasets\Drone Datasets\UAVirBase Public Access Drone Audio"
OUT_DRONE  = r"D:\FYP\Datasets\Drone Datasets\UAVirBase Extracted Drone Audio"
OUT_NOISE  = r"D:\FYP\Datasets\Drone Datasets\UAVirBase Extracted Noise Audio"

KEEP_ORIGINAL = True     # copy (keep source). Set to False to move.
NO_OVERWRITE  = False    # we auto-make unique names anyway

Path(OUT_DRONE).mkdir(parents=True, exist_ok=True)
Path(OUT_NOISE).mkdir(parents=True, exist_ok=True)

MOVEMENT_MAP = {"static":"S","hover":"H","forward":"F","backward":"B",
                "left":"L","right":"R","circle":"C","rotation":"R"}
def movement_abbrev(m: str) -> str:
    k = (m or "").strip().lower()
    return MOVEMENT_MAP.get(k, k[:1].upper() if k else "U")

def safe(text: str) -> str:
    text = (text or "").strip().replace(" ", "_")
    return re.sub(r"[^A-Za-z0-9_.-]", "", text)

def find_meta(d):
    if isinstance(d, dict):
        if "sound_source" in d: return d
        for v in d.values():
            x = find_meta(v)
            if x is not None: return x
    elif isinstance(d, list):
        for v in d:
            x = find_meta(v)
            if x is not None: return x
    return None

def unique_path(dest_dir: str, base_name: str) -> str:
    """Return a unique path by adding _v2, _v3... if needed."""
    path = Path(dest_dir) / base_name
    if not path.exists():
        return str(path)
    stem, suf = Path(base_name).stem, Path(base_name).suffix
    i = 2
    while True:
        cand = Path(dest_dir) / f"{stem}_v{i}{suf}"
        if not cand.exists():
            return str(cand)
        i += 1

# Collect all folders to process
pairs = []
for root, _, files in os.walk(SRC_ROOT):
    if "output.wav" in files and "label.json" in files:
        pairs.append(root)

print(f"Found {len(pairs)} folders to process.\n")

noise_counters = defaultdict(int)
processed = drone_count = noise_count = skipped = errors = 0

for folder in tqdm(pairs, desc="Processing", unit="folder"):
    wav_path  = os.path.join(folder, "output.wav")
    json_path = os.path.join(folder, "label.json")
    folder_id = Path(folder).name  # e.g., 20241115_093611

    try:
        if not (os.path.exists(wav_path) and os.path.exists(json_path)):
            skipped += 1
            continue

        with open(json_path, "r", encoding="utf-8") as f:
            meta = find_meta(json.load(f)) or {}

        sound_source = (meta.get("sound_source") or "").strip()

        if sound_source.lower() == "drone":
            dtype  = meta.get("type", "UnknownDrone")
            mov    = movement_abbrev(meta.get("movement", ""))
            dist   = safe(str(meta.get("distance", "NA")))
            height = safe(str(meta.get("height", "NA")))
            az     = safe(str(meta.get("azimuth", "NA")))

            base_name = f"{safe(dtype)}_{mov}_d{dist}_h{height}_a{az}_{folder_id}.wav"
            dest = unique_path(OUT_DRONE, base_name)
            target_dir = OUT_DRONE
            drone_count += 1
        else:
            base = safe(sound_source or "UnknownSource")
            noise_counters[base] += 1
            idx = noise_counters[base]
            base_name = f"{base}_{idx}_{folder_id}.wav"
            dest = unique_path(OUT_NOISE, base_name)
            target_dir = OUT_NOISE
            noise_count += 1

        if KEEP_ORIGINAL:
            shutil.copy2(wav_path, dest)
        else:
            shutil.move(wav_path, dest)

        processed += 1

    except Exception as e:
        errors += 1
        print(f"Error in {folder}: {e}")

print("\n==== Summary ====")
print(f"Processed files   : {processed}")
print(f" - Drone files    : {drone_count}  -> {OUT_DRONE}")
print(f" - No-drone files : {noise_count}  -> {OUT_NOISE}")
print(f"Skipped (missing) : {skipped}")
print(f"Errors            : {errors}")


Found 132 folders to process.



Processing: 100%|██████████| 132/132 [06:50<00:00,  3.11s/folder]


==== Summary ====
Processed files   : 132
 - Drone files    : 128  -> D:\FYP\Datasets\Drone Datasets\UAVirBase Extracted Drone Audio
 - No-drone files : 4  -> D:\FYP\Datasets\Drone Datasets\UAVirBase Extracted Noise Audio
Skipped (missing) : 0
Errors            : 0



