In [19]:
from datasets import load_dataset
import numpy as np
from tqdm.notebook import tqdm
from collections import Counter, defaultdict

In [4]:
# Download the dataset
dataset = load_dataset("danavery/urbansound8K", split="train")

In [17]:
dataset

Dataset({
    features: ['audio', 'slice_file_name', 'fsID', 'start', 'end', 'salience', 'fold', 'classID', 'class'],
    num_rows: 8732
})

In [18]:
# Prepare containers
lengths = []
sampling_rates = set()
salience_counts = Counter()
fold_counts = Counter()
class_counts = Counter()
class_salience_counts = defaultdict(lambda: Counter())
first_part_ids = []

for row in tqdm(dataset, total=len(dataset)):
    audio = row['audio']
    length = len(audio['array']) / audio['sampling_rate']
    lengths.append(length)
    sampling_rates.add(audio['sampling_rate'])
    salience_counts[row['salience']] += 1
    fold_counts[row['fold']] += 1
    class_counts[row['class']] += 1
    class_salience_counts[row['class']][row['salience']] += 1
    first_part = row['audio']['path'].split('-')[0]
    first_part_ids.append(first_part)

# Length stats
min_length = np.min(lengths)
max_length = np.max(lengths)
avg_length = np.mean(lengths)
std_length = np.std(lengths)

print(f"Length stats (seconds): min={min_length:.2f}, max={max_length:.2f}, avg={avg_length:.2f}, std={std_length:.2f}")
print(f"Sampling rates in dataset: {sampling_rates}")
print(f"Salience counts: {dict(salience_counts)}")
print(f"Fold counts: {dict(fold_counts)}")
print(f"Class counts: {dict(class_counts)}")
print("Per class, salience breakdown:")
for cls, sal_counts in class_salience_counts.items():
    print(f"  {cls}: salience=1 (foreground): {sal_counts[1]}, salience=2 (background): {sal_counts[2]}")

100%|██████████| 8732/8732 [00:38<00:00, 225.37it/s]


Length stats (seconds): min=0.05, max=4.04, avg=3.61, std=0.97
Sampling rates in dataset: {48000, 96000, 22050, 16000, 44100, 192000, 32000, 24000, 8000, 11024, 11025}
Salience counts: {1: 5702, 2: 3030}
Fold counts: {5: 936, 10: 837, 2: 888, 6: 823, 1: 873, 9: 816, 7: 838, 4: 990, 3: 925, 8: 806}
Class counts: {'dog_bark': 1000, 'children_playing': 1000, 'car_horn': 429, 'air_conditioner': 1000, 'street_music': 1000, 'gun_shot': 374, 'siren': 929, 'engine_idling': 1000, 'jackhammer': 1000, 'drilling': 1000}
Per class, salience breakdown:
  dog_bark: salience=1 (foreground): 645, salience=2 (background): 355
  children_playing: salience=1 (foreground): 588, salience=2 (background): 412
  car_horn: salience=1 (foreground): 153, salience=2 (background): 276
  air_conditioner: salience=1 (foreground): 569, salience=2 (background): 431
  street_music: salience=1 (foreground): 625, salience=2 (background): 375
  gun_shot: salience=1 (foreground): 304, salience=2 (background): 70
  siren: sa