In [1]:
# modules
import os
import soundata
import numpy as np
import pandas as pd
import soundfile as sf
import matplotlib.pyplot as plt
from IPython.display import Audio

# pipeline
from utils import SpatialProcessor

#### Using UrbanSound8k Dataset and Soundata for data validation
- classID - A numeric identifier of the sound class:
    - 0 = air_conditioner
    - 1 = car_horn
    - 2 = children_playing
    - 3 = dog_bark
    - 4 = drilling
    - 5 = engine_idling
    - 6 = gun_shot
    - 7 = jackhammer
    - 8 = siren
    - 9 = street_music

#### For this project the siren label taxonomy will be used.

In [2]:
# available datasets in soundata 
for dataset in soundata.list_datasets():
    print(dataset)

dcase23_task2
dcase23_task4b
dcase23_task6a
dcase23_task6b
dcase_bioacoustic
dcase_birdVox20k
eigenscape
eigenscape_raw
esc50
freefield1010
fsd50k
fsdnoisy18k
marco
singapura
starss2022
tau2019sse
tau2019uas
tau2020sse_nigens
tau2020uas_mobile
tau2021sse_nigens
tau2022uas_mobile
tut2017se
urbansed
urbansound8k
warblrb10k


In [10]:
dataset = soundata.initialize('urbansound8k', data_home='/Users/calodii/Desktop/stuff/home/threed-audio/final/aural-alert')
# dataset.download()
# dataset.validate()

# metadata
# metadata = pd.read_csv(dataset.metadata_path)

# Dataset Development
#### Spatialization Pipeline
- Extract "siren" class clips form the UrbanSound8k datasets and normalize sample rates to 48kHz

#### Metadata Embedding
- Use metadata fields for:
    - Urgency Levels (0-5 scale)
    - Proximity (meters/dBSPL reference)
    - Spatial Position (azimuth/elevation (degrees))

#### Spatial Capture Simulation
- Proces mno files through Ambisonic encoders (Pro Tools)
- Apply synthesized room acoustics using REAKTOR BRIR Generator for:
    - Hallways (long RT60)
    - Office Rooms (short RT60)

# 3D Audio Implementation
#### HRTF Spatialization
- Near-field HRTF compensation (<1m sources>)
Dynamic ITD/ILD adjustment based on urgency (e.g. +15% ILD for critical signals)
- Map urgency to spectral brightness
- Directional encoding: $0^{\circ}$=fire, $120^{\circ}$=gas leak, $240^{\circ}$=evacuation route

# VBAP Implementation
- Using Faust's VBAP library increase spread range from $15^{\circ}$(normal) to $45^{\circ}$(urgent).

In [11]:
# 3d vbap setup (8 speakers in cube configuration)
positions = [
    [1,1,1], [-1,1,1], [-1,-1,1], [1,-1,1],
    [1,1,-1], [-1,1,-1], [-1,-1,-1], [1,-1,-1]
]

In [12]:
# Filter for siren sounds (class_id = 8)
siren_metadata = metadata[metadata['classID'] == 8]

# Function to load an audio file
def load_audio(file_path):
    audio_path = os.path.join(dataset.data_home, file_path)
    return sf.read(audio_path)

# Load first siren clip as example
first_siren = siren_metadata.iloc[0]
audio_path = os.path.join('fold' + str(first_siren['fold']), first_siren['slice_file_name'])
audio_data, sample_rate = load_audio(audio_path)

# Play the audio
Audio(data=audio_data, rate=sample_rate)

# Print clip information
print(f"Clip ID: {first_siren['slice_file_name']}")
print(f"Sample Rate: {sample_rate} Hz")
print(f"Duration: {len(audio_data)/sample_rate:.2f} seconds")
print(f"Fold: {first_siren['fold']}")
print(f"Class: {first_siren['class']}")

LibsndfileError: Error opening '/Users/calodii/Desktop/stuff/home/threed-audio/final/aural-alert/fold7/102853-8-0-0.wav': System error.

In [None]:
# initialize spatial processor
processor = SpatialProcessor(sample_rate=48000)

# metadata
metadata = pd.read_csv('metadata/UrbanSound8K.csv')
siren_files = metadata[metadata['class'] == 'siren']

# Process emergency signals
def process_emergency_signal(audio_path: str, 
                           urgency: float,
                           azimuth: float,
                           room_type: str = 'office') -> np.ndarray:
    """Process a single emergency signal with spatial audio effects"""
    
    # Load audio file
    audio, sr = sf.read(audio_path)
    if sr != processor.sample_rate:
        # Resample to target rate
        audio = signal.resample(audio, int(len(audio) * processor.sample_rate / sr))
    
    # Convert to mono if stereo
    if len(audio.shape) > 1:
        audio = audio.mean(axis=1)
    
    # Load HRTF and BRIR data (you'll need to provide these)
    hrtf = np.load('hrtf_database.npy')  # Replace with your HRTF database
    brir = np.load('brir_database.npy')  # Replace with your BRIR database
    
    # Apply spatial processing chain
    spatialized = processor.process_hrtf(audio, hrtf, azimuth, 0)
    urgent = processor.apply_urgency(spatialized, urgency)
    final = processor.apply_brir(urgent, brir, room_type)
    
    return final

# Example usage
audio_path = 'audio/fold1/4918-3-0-0.wav'  # Replace with actual siren file
result = process_emergency_signal(
    audio_path,
    urgency=0.8,  # High urgency
    azimuth=0,    # Front direction (fire alarm)
    room_type='office'
)

# Play the result
from IPython.display import Audio
Audio(result.T, rate=processor.sample_rate)

FileNotFoundError: [Errno 2] No such file or directory: 'hrtf_database.npy'