In [None]:
!pip install librosa matplotlib pandas tqdm


In [None]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
def audio_to_spectrogram_array(audio_path, sr=22050, n_mels=128, fmax=8000, max_length=174):

    y, sr = librosa.load(audio_path, sr=sr, duration=4.0)

    target_length = sr * 4
    if len(y) < target_length:
        y = np.pad(y, (0, target_length - len(y)), mode='constant')
    else:
        y = y[:target_length]

    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=fmax)

    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

    return mel_spec_db

def process_urbansound8k(dataset_path, output_file='urbansound8k_spectrograms.npz'):

    audio_path = Path(dataset_path) / 'audio'
    metadata_path = Path(dataset_path) / 'metadata' / 'UrbanSound8K.csv'

    metadata = pd.read_csv(metadata_path)

    spectrograms = []
    labels = []
    filenames = []
    folds = []

    print(f"Audio file count: {len(metadata)}")

    for idx, row in tqdm(metadata.iterrows(), total=len(metadata), desc="Converting"):
        fold = f"fold{row['fold']}"
        audio_file = audio_path / fold / row['slice_file_name']

        if not audio_file.exists():
            print(f"Warning: {audio_file} not found")
            continue

        try:
            spec = audio_to_spectrogram_array(str(audio_file))
            spectrograms.append(spec)
            labels.append(row['classID'])
            filenames.append(row['slice_file_name'])
            folds.append(row['fold'])

        except Exception as e:
            print(f"Error processing {audio_file}: {e}")

    spectrograms = np.array(spectrograms)
    labels = np.array(labels)
    filenames = np.array(filenames)
    folds = np.array(folds)

    print(f"\nDataset shape: {spectrograms.shape}")
    print(f"Labels shape: {labels.shape}")
    print(f"Each spectrogram shape: {spectrograms[0].shape}")

    np.savez_compressed(output_file,
                       spectrograms=spectrograms,
                       labels=labels,
                       filenames=filenames,
                       folds=folds)

    print(f"Saved to {output_file}")
    return spectrograms, labels, filenames, folds

def plot_spectrogram_examples(npz_file, num_examples=5):

    data = np.load(npz_file)
    spectrograms = data['spectrograms']
    labels = data['labels']
    filenames = data['filenames']

    class_names = [
        'air_conditioner', 'car_horn', 'children_playing', 'dog_bark',
        'drilling', 'engine_idling', 'gun_shot', 'jackhammer',
        'siren', 'street_music'
    ]

    indices = np.random.choice(len(spectrograms), size=min(num_examples, len(spectrograms)), replace=False)

    fig, axes = plt.subplots(num_examples, 1, figsize=(12, 3*num_examples))
    if num_examples == 1:
        axes = [axes]

    for i, idx in enumerate(indices):
        spec = spectrograms[idx]
        label = labels[idx]
        filename = filenames[idx]

        img = librosa.display.specshow(spec, sr=22050, x_axis='time', y_axis='mel',
                                       fmax=8000, ax=axes[i], cmap='viridis')
        axes[i].set_title(f'Class: {class_names[label]} | File: {filename}')
        fig.colorbar(img, ax=axes[i], format='%+2.0f dB')

    plt.tight_layout()
    plt.show()

    print(f"\nDataset info:")
    print(f"Total samples: {len(spectrograms)}")
    print(f"Spectrogram shape: {spectrograms[0].shape}")
    print(f"Unique classes: {len(np.unique(labels))}")
    print(f"\nClass distribution:")
    for class_id in range(10):
        count = np.sum(labels == class_id)
        print(f"  {class_names[class_id]}: {count} samples")

In [None]:
spectrograms, labels, filenames, folds = process_urbansound8k('/content/drive/MyDrive/UrbanSound8K')

plot_spectrogram_examples('urbansound8k_spectrograms.npz', num_examples=5)

data = np.load('urbansound8k_spectrograms.npz')
X = data['spectrograms']
y = data['labels']
