In [1]:
# rename_and_collect_uavirbase.py
import os
import json
import shutil
import re
from collections import defaultdict
from pathlib import Path


In [2]:
# ---------- CONFIGURE PATHS ----------
SRC_ROOT = r"D:\FYP\Datasets\Drone Datasets\UAVirBase Public Access Drone Audio"
OUT_DRONE = r"D:\FYP\Datasets\Drone Datasets\UAVirBase Extracted Drone Audio"
OUT_NOISE = r"D:\FYP\Datasets\Drone Datasets\UAVirBase Extracted Noise Audio"
# ------------------------------------

In [3]:

Path(OUT_DRONE).mkdir(parents=True, exist_ok=True)
Path(OUT_NOISE).mkdir(parents=True, exist_ok=True)

# Movement → 1-letter code (extend if needed)
MOVEMENT_MAP = {
    "static": "S",
    "hover": "H",
    "forward": "F",
    "backward": "B",
    "left": "L",
    "right": "R",
    "circle": "C",
    "rotation": "R",  # if movement accidentally set to "Rotation"
}

In [4]:

def movement_abbrev(m: str) -> str:
    if not m:
        return "U"  # Unknown
    key = m.strip().lower()
    return MOVEMENT_MAP.get(key, key[:1].upper() if key else "U")

# Make filename-safe (Windows)
def safe(text: str) -> str:
    text = text.strip().replace(" ", "_")
    # keep letters, numbers, _, -, and .
    return re.sub(r"[^A-Za-z0-9_.-]", "", text)

# Try to find a dict in JSON that contains 'sound_source'
def find_meta(d):
    if isinstance(d, dict):
        if "sound_source" in d:
            return d
        for v in d.values():
            found = find_meta(v)
            if found is not None:
                return found
    elif isinstance(d, list):
        for v in d:
            found = find_meta(v)
            if found is not None:
                return found
    return None

# Counters for non-drone sources
noise_counters = defaultdict(int)

processed = 0
drone_count = 0
noise_count = 0
skipped = 0
errors = 0

In [5]:
# Iterate subfolders that contain output.wav + label.json
for root, dirs, files in os.walk(SRC_ROOT):
    if "output.wav" in files and "label.json" in files:
        wav_path = os.path.join(root, "output.wav")
        json_path = os.path.join(root, "label.json")
        try:
            with open(json_path, "r", encoding="utf-8") as f:
                data = json.load(f)
            meta = find_meta(data) or {}
            sound_source = (meta.get("sound_source") or "").strip()

            if not os.path.exists(wav_path):
                skipped += 1
                print(f"⚠️  Skipping (no wav): {wav_path}")
                continue

            if sound_source.lower() == "drone":
                # Build drone filename
                type_ = meta.get("type", "UnknownDrone")
                mv = movement_abbrev(meta.get("movement", ""))
                dist = safe(str(meta.get("distance", "NA")))
                height = safe(str(meta.get("height", "NA")))
                az = safe(str(meta.get("azimuth", "NA")))

                fname = f"{safe(type_)}_{mv}_d{dist}_h{height}_a{az}.wav"
                dest = os.path.join(OUT_DRONE, fname)
                shutil.copy2(wav_path, dest)
                drone_count += 1
            else:
                # Non-drone: name + running index
                base = safe(sound_source or "UnknownSource")
                noise_counters[base] += 1
                idx = noise_counters[base]
                fname = f"{base}_{idx}.wav"
                dest = os.path.join(OUT_NOISE, fname)
                shutil.copy2(wav_path, dest)
                noise_count += 1

            processed += 1

        except Exception as e:
            errors += 1
            print(f"❌ Error in {root}: {e}")

print("\n==== Summary ====")
print(f"Processed folders : {processed}")
print(f" - Drone files    : {drone_count}  -> {OUT_DRONE}")
print(f" - No-drone files : {noise_count}  -> {OUT_NOISE}")
print(f"Skipped (missing) : {skipped}")
print(f"Errors            : {errors}")



==== Summary ====
Processed folders : 132
 - Drone files    : 128  -> D:\FYP\Datasets\Drone Datasets\UAVirBase Extracted Drone Audio
 - No-drone files : 4  -> D:\FYP\Datasets\Drone Datasets\UAVirBase Extracted Noise Audio
Skipped (missing) : 0
Errors            : 0
