# Downloading libraries

In [None]:
!pip install librosa matplotlib pandas tqdm




# Importing Libraries

In [None]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm
import pandas as pd
from imblearn.over_sampling import RandomOverSampler

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Spectrogram Creation and Processing

In [None]:
def wav_to_spectro(path, sr=24000, n_mels=128, fmax=8000, secs=4):
    y, sr = librosa.load(path, sr=sr, duration=secs)
    L = int(sr * secs)
    if len(y) < L:
        y = np.pad(y, (0, L - len(y)))
    else:
        y = y[:L]
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=fmax)
    return librosa.power_to_db(S, ref=np.max)


def urban8k_process(root, out_file="urbansound8k_spectrograms.npz"):
    root = Path(root)
    meta = pd.read_csv(root / "metadata" / "UrbanSound8K.csv")

    specs, labels, files, folds = [], [], [], []

    for row in tqdm(meta.itertuples(index=False), total=len(meta)):
        fold_dir = f"fold{row.fold}"
        wav = root / "audio" / fold_dir / row.slice_file_name

        spec = wav_to_spectro(str(wav))
        specs.append(spec)
        labels.append(row.classID)
        files.append(row.slice_file_name)
        folds.append(row.fold)

    specs  = np.array(specs)
    labels = np.array(labels)
    files  = np.array(files)
    folds  = np.array(folds)

    np.savez_compressed(out_file,
        spectrograms=specs,
        labels=labels,
        filenames=files,
        folds=folds
    )

    return specs, labels, files, folds

In [None]:
spectrograms, labels, filenames, folds = urban8k_process('/content/drive/MyDrive/UrbanSound8K')

# Tackling class imbalance

In [None]:
data = np.load('/content/drive/MyDrive/urbansound8k_spectrograms.npz')
X = data['spectrograms']
y = data['labels']

class_names = [
    'air_conditioner', 'car_horn', 'children_playing', 'dog_bark',
    'drilling', 'engine_idling', 'gun_shot', 'jackhammer',
    'siren', 'street_music'
]

print("Unbalanced distribution:")
for class_id in range(10):
    count = np.sum(y == class_id)
    print(f"{class_names[class_id]:20s}: {count:4d}")

X_flat = X.reshape(X.shape[0], -1)
ros = RandomOverSampler(random_state=42)
X_balanced, y_balanced = ros.fit_resample(X_flat, y)

X_balanced = X_balanced.reshape(-1, X.shape[1], X.shape[2])

print("Balanced distribution:")
for class_id in range(10):
    count = np.sum(y_balanced == class_id)
    print(f"{class_names[class_id]:20s}: {count:4d}")

np.savez_compressed('/content/drive/MyDrive/urbansound8k_spectrograms_balanced.npz',
                   spectrograms=X_balanced,
                   labels=y_balanced)

Unbalanced distribution:
air_conditioner     : 1000
car_horn            :  429
children_playing    : 1000
dog_bark            : 1000
drilling            : 1000
engine_idling       : 1000
gun_shot            :  374
jackhammer          : 1000
siren               :  929
street_music        : 1000
Balanced distribution:
air_conditioner     : 1000
car_horn            : 1000
children_playing    : 1000
dog_bark            : 1000
drilling            : 1000
engine_idling       : 1000
gun_shot            : 1000
jackhammer          : 1000
siren               : 1000
street_music        : 1000
