# Dataset Raw

In [2]:
import os
import pandas as pd
import librosa
import numpy as np
from tqdm import tqdm
from PIL import Image

# === CONFIGURACIÓN ===
CSV_PATH = "data/ESC-50-master/meta/esc50.csv"
AUDIO_DIR = "data/ESC-50-master/audio"
OUTPUT_DIR = "data/spectrograms/base"
SR = 22050
IMG_SIZE = (224, 224)

# Crear directorio base
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Cargar metadatos
df = pd.read_csv(CSV_PATH)

def wav_to_spectrogram(wav_path, save_path):
    try:
        y, sr = librosa.load(wav_path, sr=SR)
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        S_db = librosa.power_to_db(S, ref=np.max)
        # Normalización 0–255 y conversión a escala de grises
        S_norm = (S_db - S_db.min()) / (S_db.max() - S_db.min())
        S_img = (S_norm * 255).astype(np.uint8)
        img = Image.fromarray(S_img).resize(IMG_SIZE).convert("L")
        img.save(save_path)
    except Exception as e:
        print(f"Error procesando {wav_path}: {e}")

# === CONVERSIÓN ===
for _, row in tqdm(df.iterrows(), total=len(df)):
    file_name = row["filename"]
    label = row["category"]
    
    class_dir = os.path.join(OUTPUT_DIR, label)
    os.makedirs(class_dir, exist_ok=True)
    
    wav_path = os.path.join(AUDIO_DIR, file_name)
    save_path = os.path.join(class_dir, file_name.replace(".wav", ".png"))
    
    wav_to_spectrogram(wav_path, save_path)


100%|██████████| 2000/2000 [00:35<00:00, 56.66it/s]
