# Dataset Raw

In [6]:
import os
import pandas as pd
import librosa
import numpy as np
from tqdm import tqdm
from PIL import Image
from sklearn.model_selection import train_test_split

# === CONFIGURACIÓN ===
CSV_PATH = "data/ESC-50-master/meta/esc50.csv"
AUDIO_DIR = "data/ESC-50-master/audio"
OUTPUT_DIR = "data/spectrograms2/base"
SR = 22050
IMG_SIZE = (224, 224)

# Proporciones para split estratificado
TRAIN_RATIO = 0.70  # 70% entrenamiento
VAL_RATIO = 0.15    # 15% validación
TEST_RATIO = 0.15   # 15% test

# Crear carpetas base
for split in ["train", "val", "test"]:
    os.makedirs(os.path.join(OUTPUT_DIR, split), exist_ok=True)

# Leer metadatos
df = pd.read_csv(CSV_PATH)

def wav_to_spectrogram(wav_path, save_path):
    try:
        y, sr = librosa.load(wav_path, sr=SR)
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        S_db = librosa.power_to_db(S, ref=np.max)

        S_norm = (S_db - S_db.min()) / (S_db.max() - S_db.min())
        S_img = (S_norm * 255).astype(np.uint8)

        img = Image.fromarray(S_img).resize(IMG_SIZE).convert("L")
        img.save(save_path)
    except Exception as e:
        print(f"⚠️ Error procesando {wav_path}: {e}")

def process_split(df_split, split_name):
    for _, row in tqdm(df_split.iterrows(), total=len(df_split), desc=f"Procesando {split_name}"):
        file_name = row["filename"]
        label = row["category"]

        # Crear carpeta por clase
        class_dir = os.path.join(OUTPUT_DIR, split_name, label)
        os.makedirs(class_dir, exist_ok=True)

        wav_path = os.path.join(AUDIO_DIR, file_name)
        save_path = os.path.join(class_dir, file_name.replace(".wav", ".png"))

        if not os.path.exists(save_path):
            wav_to_spectrogram(wav_path, save_path)

# === SPLIT ESTRATIFICADO ===
print(f"Total de muestras: {len(df)}")
print(f"Categorías únicas: {df['category'].nunique()}")
print(f"\nDistribución por categoría:")
print(df['category'].value_counts().sort_index())

# Primera división: train vs (val+test)
train_df, temp_df = train_test_split(
    df, 
    test_size=(VAL_RATIO + TEST_RATIO),
    stratify=df['category'],
    random_state=42
)

# Segunda división: val vs test
val_ratio_adjusted = VAL_RATIO / (VAL_RATIO + TEST_RATIO)
val_df, test_df = train_test_split(
    temp_df,
    test_size=(1 - val_ratio_adjusted),
    stratify=temp_df['category'],
    random_state=42
)

print(f"\n=== Tamaños de splits ===")
print(f"Train: {len(train_df)} ({len(train_df)/len(df)*100:.1f}%)")
print(f"Val:   {len(val_df)} ({len(val_df)/len(df)*100:.1f}%)")
print(f"Test:  {len(test_df)} ({len(test_df)/len(df)*100:.1f}%)")

print(f"\n=== Verificación de estratificación ===")
print("Train categorías:", train_df['category'].nunique())
print("Val categorías:", val_df['category'].nunique())
print("Test categorías:", test_df['category'].nunique())

# Generar los tres splits
process_split(train_df, "train")
process_split(val_df, "val")
process_split(test_df, "test")

print("\n✅ Espectrogramas generados con split estratificado.")
print("✅ Todas las categorías están representadas en cada conjunto.")


Total de muestras: 2000
Categorías únicas: 50

Distribución por categoría:
category
airplane            40
breathing           40
brushing_teeth      40
can_opening         40
car_horn            40
cat                 40
chainsaw            40
chirping_birds      40
church_bells        40
clapping            40
clock_alarm         40
clock_tick          40
coughing            40
cow                 40
crackling_fire      40
crickets            40
crow                40
crying_baby         40
dog                 40
door_wood_creaks    40
door_wood_knock     40
drinking_sipping    40
engine              40
fireworks           40
footsteps           40
frog                40
glass_breaking      40
hand_saw            40
helicopter          40
hen                 40
insects             40
keyboard_typing     40
laughing            40
mouse_click         40
pig                 40
pouring_water       40
rain                40
rooster             40
sea_waves           40
sheep              

Procesando train: 100%|██████████| 1400/1400 [00:16<00:00, 86.86it/s]
Procesando val: 100%|██████████| 300/300 [00:03<00:00, 84.22it/s]
Procesando test: 100%|██████████| 300/300 [00:03<00:00, 82.71it/s]


✅ Espectrogramas generados con split estratificado.
✅ Todas las categorías están representadas en cada conjunto.





# Modelo A

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class LeNet5(nn.Module):
    def __init__(self, num_classes=50, dropout=0.4):
        super(LeNet5, self).__init__()

        # --- Bloque 1: Más filtros para capturar características complejas ---
        self.conv1 = nn.Conv2d(1, 16, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm2d(16)
        self.pool1 = nn.MaxPool2d(2, 2)

        # --- Bloque 2: Mayor capacidad ---
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5)
        self.bn2 = nn.BatchNorm2d(32)
        self.pool2 = nn.MaxPool2d(2, 2)

        # --- Bloque 3: Profundidad adicional ---
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        self.bn3 = nn.BatchNorm2d(64)
        self.pool3 = nn.MaxPool2d(2, 2)
        
        # Dropout espacial
        self.drop_conv = nn.Dropout2d(0.3)

        # ✅ CALCULAR DIMENSIONES DINÁMICAMENTE
        self._to_linear = None
        self._get_conv_output_size((1, 224, 224))

        # --- Capas densas con regularización fuerte ---
        self.fc1 = nn.Linear(self._to_linear, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, num_classes)
        
        # Dropout más agresivo
        self.dropout1 = nn.Dropout(p=0.5)
        self.dropout2 = nn.Dropout(p=0.4)

        # Inicialización Xavier para tanh
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

    def _get_conv_output_size(self, shape):
        """Calcula automáticamente el tamaño de salida de las capas conv."""
        with torch.no_grad():
            dummy_input = torch.zeros(1, *shape)
            x = self.pool1(torch.tanh(self.bn1(self.conv1(dummy_input))))
            x = self.pool2(torch.tanh(self.bn2(self.conv2(x))))
            x = self.pool3(torch.tanh(self.bn3(self.conv3(x))))
            self._to_linear = x.view(1, -1).shape[1]
            print(f"✅ Tamaño calculado para FC: {self._to_linear}")

    def forward(self, x):
        # Bloques convolucionales
        x = self.pool1(torch.tanh(self.bn1(self.conv1(x))))
        x = self.pool2(torch.tanh(self.bn2(self.conv2(x))))
        x = self.pool3(torch.tanh(self.bn3(self.conv3(x))))
        x = self.drop_conv(x)
        
        # Aplanar
        x = x.view(x.size(0), -1)
        
        # Capas densas con dropout fuerte
        x = torch.tanh(self.fc1(x))
        x = self.dropout1(x)
        x = torch.tanh(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

## Entrenamiento Dataset Raw Modelo A

In [2]:
import wandb
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
from sklearn.metrics import precision_recall_fscore_support

# ==============================
# CONFIGURACIÓN
# ==============================
DATA_DIR = "data/spectrograms2/base"
IMG_SIZE = (224, 224)
EPOCHS = 40
PATIENCE = 6

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==============================
# TRANSFORMACIONES
# ==============================
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

train_data = datasets.ImageFolder(f"{DATA_DIR}/train", transform=transform)
val_data   = datasets.ImageFolder(f"{DATA_DIR}/val", transform=transform)

class_names = train_data.classes

# ==============================
# EXPERIMENTOS
# ==============================
experiments = [
    {"optimizer": "SGD", "lr": 0.001, "batch_size": 32, "weight_decay": 5e-5},  # Reducir weight_decay
    {"optimizer": "SGD", "lr": 0.0005, "batch_size": 32, "weight_decay": 5e-5},
    {"optimizer": "SGD", "lr": 0.001, "batch_size": 64, "weight_decay": 5e-5},
    {"optimizer": "SGD",  "lr": 0.01,  "batch_size": 32, "weight_decay": 5e-5},
    {"optimizer": "SGD",  "lr": 0.001, "batch_size": 16, "weight_decay": 5e-5},
]

# ==============================
# CICLO DE ENTRENAMIENTO
# ==============================
for i, exp in enumerate(experiments, start=1):
    try:
        wandb.finish()  # Cerrar cualquier run previo
    except:
        pass
    wandb.init(
        project="esc50-lenet_stratified",
        name=f"run_{i}_opt-{exp['optimizer']}_lr-{exp['lr']}_bs-{exp['batch_size']}",
        config=exp
    )
    config = wandb.config

    train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True)
    val_loader   = DataLoader(val_data, batch_size=config.batch_size)

    model = LeNet5(num_classes=len(train_data.classes)).to(device)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.05)  # Reducir label smoothing

    if config.optimizer == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=config.weight_decay)
    else:
        optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, weight_decay=config.weight_decay)

    scheduler = StepLR(optimizer, step_size=8, gamma=0.7)

    best_val_acc = 0.0
    patience_counter = 0

    for epoch in range(EPOCHS):
        # === ENTRENAMIENTO ===
        model.train()
        running_loss, correct, total = 0, 0, 0
        
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()

        train_acc = correct / total
        train_loss = running_loss / len(train_loader)

        # === VALIDACIÓN ===
        model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        val_y_true, val_y_pred = [], []
        
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (preds == labels).sum().item()
                
                val_y_true.extend(labels.cpu().tolist())
                val_y_pred.extend(preds.cpu().tolist())

        val_acc = val_correct / val_total
        val_loss /= len(val_loader)
        
        # === MÉTRICAS ADICIONALES ===
        prec_m, rec_m, f1_m, _ = precision_recall_fscore_support(
            val_y_true, val_y_pred, average="macro", zero_division=0
        )
        _, _, f1_w, _ = precision_recall_fscore_support(
            val_y_true, val_y_pred, average="weighted", zero_division=0
        )
        
        scheduler.step()

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "val_loss": val_loss,
            "val_acc": val_acc,
            "val_f1_macro": f1_m,
            "val_f1_weighted": f1_w,
            "val_precision_macro": prec_m,
            "val_recall_macro": rec_m,
            "lr": scheduler.get_last_lr()[0],
            "val_confusion_matrix": wandb.plot.confusion_matrix(
                y_true=val_y_true,
                preds=val_y_pred,
                class_names=class_names
            )
        })

        print(f"[Run {i}] Epoch {epoch+1}/{EPOCHS} | "
              f"Train Acc: {train_acc:.3f} | Val Acc: {val_acc:.3f} | "
              f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

        # === EARLY STOPPING ===
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), f"models/lenet_best_run{i}.pth")
            wandb.run.summary["best_val_acc"] = best_val_acc
        else:
            patience_counter += 1
            if patience_counter > PATIENCE:
                print(f"[Run {i}] Early stopping triggered at epoch {epoch+1}.")
                break

    print(f"✅ [Run {i}] Mejor Val Acc: {best_val_acc:.3f}")
    wandb.finish()


[34m[1mwandb[0m: Currently logged in as: [33mjavialroro[0m ([33mjavialroro-tecnologico-de-costa-rica[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Currently logged in as: [33mjavialroro[0m ([33mjavialroro-tecnologico-de-costa-rica[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


✅ Tamaño calculado para FC: 43264
[Run 1] Epoch 1/40 | Train Acc: 0.044 | Val Acc: 0.073 | Train Loss: 4.1785 | Val Loss: 3.4956
[Run 1] Epoch 1/40 | Train Acc: 0.044 | Val Acc: 0.073 | Train Loss: 4.1785 | Val Loss: 3.4956
[Run 1] Epoch 2/40 | Train Acc: 0.071 | Val Acc: 0.147 | Train Loss: 3.8950 | Val Loss: 3.3310
[Run 1] Epoch 2/40 | Train Acc: 0.071 | Val Acc: 0.147 | Train Loss: 3.8950 | Val Loss: 3.3310
[Run 1] Epoch 3/40 | Train Acc: 0.096 | Val Acc: 0.147 | Train Loss: 3.6854 | Val Loss: 3.3073
[Run 1] Epoch 3/40 | Train Acc: 0.096 | Val Acc: 0.147 | Train Loss: 3.6854 | Val Loss: 3.3073
[Run 1] Epoch 4/40 | Train Acc: 0.106 | Val Acc: 0.220 | Train Loss: 3.6071 | Val Loss: 3.1705
[Run 1] Epoch 4/40 | Train Acc: 0.106 | Val Acc: 0.220 | Train Loss: 3.6071 | Val Loss: 3.1705
[Run 1] Epoch 5/40 | Train Acc: 0.137 | Val Acc: 0.183 | Train Loss: 3.4642 | Val Loss: 3.2047
[Run 1] Epoch 5/40 | Train Acc: 0.137 | Val Acc: 0.183 | Train Loss: 3.4642 | Val Loss: 3.2047
[Run 1] Epoch 6/

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
train_acc,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▆▅▆▆▆▆▇▇▇▇▇▇▇█▇██████
train_loss,█▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_acc,▁▂▂▄▃▃▅▄▄▅▅▅▆▆▆▆▆▆▆▇▆▇▇▇▇▇▇▇▇▇██▇███████
val_f1_macro,▁▂▂▃▂▃▄▄▄▄▅▅▅▆▆▆▆▆▆▇▆▇▇▇▇▇▇▇▇▇██▇███████
val_f1_weighted,▁▂▂▃▂▃▄▄▄▄▅▅▅▆▆▆▆▆▆▇▆▇▇▇▇▇▇▇▇▇██▇███████
val_loss,█▇▇▆▆▅▅▅▄▄▄▄▃▃▃▃▂▃▃▂▃▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁
val_precision_macro,▁▃▂▃▃▄▅▄▄▄▆▅▆▆▅▅▆▆▆▆▇▇▇▇▇▇█▇▇▇██▇███████
val_recall_macro,▁▂▂▄▃▃▅▄▄▅▅▅▆▆▆▆▆▆▆▇▆▇▇▇▇▇▇▇▇▇██▇███████

0,1
best_val_acc,0.46333
epoch,40
lr,0.00017
train_acc,0.79071
train_loss,1.24278
val_acc,0.44667
val_f1_macro,0.41307
val_f1_weighted,0.41307
val_loss,2.38216
val_precision_macro,0.45397


✅ Tamaño calculado para FC: 43264
[Run 2] Epoch 1/40 | Train Acc: 0.041 | Val Acc: 0.103 | Train Loss: 4.1617 | Val Loss: 3.4721
[Run 2] Epoch 1/40 | Train Acc: 0.041 | Val Acc: 0.103 | Train Loss: 4.1617 | Val Loss: 3.4721
[Run 2] Epoch 2/40 | Train Acc: 0.081 | Val Acc: 0.140 | Train Loss: 3.8083 | Val Loss: 3.2910
[Run 2] Epoch 2/40 | Train Acc: 0.081 | Val Acc: 0.140 | Train Loss: 3.8083 | Val Loss: 3.2910
[Run 2] Epoch 3/40 | Train Acc: 0.095 | Val Acc: 0.197 | Train Loss: 3.6987 | Val Loss: 3.1789
[Run 2] Epoch 3/40 | Train Acc: 0.095 | Val Acc: 0.197 | Train Loss: 3.6987 | Val Loss: 3.1789
[Run 2] Epoch 4/40 | Train Acc: 0.134 | Val Acc: 0.223 | Train Loss: 3.5026 | Val Loss: 3.1260
[Run 2] Epoch 4/40 | Train Acc: 0.134 | Val Acc: 0.223 | Train Loss: 3.5026 | Val Loss: 3.1260
[Run 2] Epoch 5/40 | Train Acc: 0.148 | Val Acc: 0.243 | Train Loss: 3.4111 | Val Loss: 3.0268
[Run 2] Epoch 5/40 | Train Acc: 0.148 | Val Acc: 0.243 | Train Loss: 3.4111 | Val Loss: 3.0268
[Run 2] Epoch 6/

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁
train_acc,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train_loss,█▇▇▆▆▆▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
val_acc,▁▂▃▃▄▄▄▄▅▄▅▅▅▆▆▆▇▆▇▇▇▇▇▇███▇██▇█
val_f1_macro,▁▂▂▃▃▃▄▄▄▄▄▅▅▅▆▆▇▆▇▇▇▇█▇███▇██▇█
val_f1_weighted,▁▂▂▃▃▃▄▄▄▄▄▅▅▅▆▆▇▆▇▇▇▇█▇███▇██▇█
val_loss,█▇▆▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_precision_macro,▁▁▂▃▃▃▃▄▄▄▄▅▄▅▅▆▇▅▆▇▆▇▇▇█▇█▇▇▇▇█
val_recall_macro,▁▂▃▃▄▄▄▄▅▄▅▅▅▆▆▆▇▆▇▇▇▇▇▇███▇██▇█

0,1
best_val_acc,0.45333
epoch,32
lr,0.00012
train_acc,0.66786
train_loss,1.64244
val_acc,0.44
val_f1_macro,0.41345
val_f1_weighted,0.41345
val_loss,2.3793
val_precision_macro,0.47


✅ Tamaño calculado para FC: 43264
[Run 3] Epoch 1/40 | Train Acc: 0.040 | Val Acc: 0.060 | Train Loss: 4.2723 | Val Loss: 3.5778
[Run 3] Epoch 1/40 | Train Acc: 0.040 | Val Acc: 0.060 | Train Loss: 4.2723 | Val Loss: 3.5778
[Run 3] Epoch 2/40 | Train Acc: 0.078 | Val Acc: 0.123 | Train Loss: 3.8235 | Val Loss: 3.3593
[Run 3] Epoch 2/40 | Train Acc: 0.078 | Val Acc: 0.123 | Train Loss: 3.8235 | Val Loss: 3.3593
[Run 3] Epoch 3/40 | Train Acc: 0.101 | Val Acc: 0.140 | Train Loss: 3.6025 | Val Loss: 3.2522
[Run 3] Epoch 3/40 | Train Acc: 0.101 | Val Acc: 0.140 | Train Loss: 3.6025 | Val Loss: 3.2522
[Run 3] Epoch 4/40 | Train Acc: 0.116 | Val Acc: 0.180 | Train Loss: 3.5556 | Val Loss: 3.1247
[Run 3] Epoch 4/40 | Train Acc: 0.116 | Val Acc: 0.180 | Train Loss: 3.5556 | Val Loss: 3.1247
[Run 3] Epoch 5/40 | Train Acc: 0.150 | Val Acc: 0.217 | Train Loss: 3.3424 | Val Loss: 3.0691
[Run 3] Epoch 5/40 | Train Acc: 0.150 | Val Acc: 0.217 | Train Loss: 3.3424 | Val Loss: 3.0691
[Run 3] Epoch 6/

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
train_acc,▁▁▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇█▇▇█████
train_loss,█▇▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_acc,▁▂▂▃▄▃▄▄▅▅▅▅▆▅▅▆▇▇▆▆▇▇▇▇▇▇▇▇▇█▇▇▇▇█▇██▇█
val_f1_macro,▁▂▂▃▃▃▃▄▅▄▅▅▆▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█▇▇▇▇█▇████
val_f1_weighted,▁▂▂▃▃▃▃▄▅▄▅▅▆▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█▇▇▇▇█▇████
val_loss,█▇▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▁▁▂▂▂▁▁▁▁▁▁▁
val_precision_macro,▁▂▂▃▄▃▄▄▅▅▅▅▆▅▅▆▆▇▇▆▇▇█▇▇▇▇▇▇█▇█▇███████
val_recall_macro,▁▂▂▃▄▃▄▄▅▅▅▅▆▅▅▆▇▇▆▆▇▇▇▇▇▇▇▇▇█▇▇▇▇█▇██▇█

0,1
best_val_acc,0.46667
epoch,40
lr,0.00017
train_acc,0.74071
train_loss,1.44332
val_acc,0.46667
val_f1_macro,0.4382
val_f1_weighted,0.4382
val_loss,2.2626
val_precision_macro,0.4449


✅ Tamaño calculado para FC: 43264
[Run 4] Epoch 1/40 | Train Acc: 0.046 | Val Acc: 0.077 | Train Loss: 4.2342 | Val Loss: 3.6647
[Run 4] Epoch 1/40 | Train Acc: 0.046 | Val Acc: 0.077 | Train Loss: 4.2342 | Val Loss: 3.6647
[Run 4] Epoch 2/40 | Train Acc: 0.064 | Val Acc: 0.087 | Train Loss: 3.9872 | Val Loss: 3.5392
[Run 4] Epoch 2/40 | Train Acc: 0.064 | Val Acc: 0.087 | Train Loss: 3.9872 | Val Loss: 3.5392
[Run 4] Epoch 3/40 | Train Acc: 0.074 | Val Acc: 0.087 | Train Loss: 3.8291 | Val Loss: 3.5265
[Run 4] Epoch 3/40 | Train Acc: 0.074 | Val Acc: 0.087 | Train Loss: 3.8291 | Val Loss: 3.5265
[Run 4] Epoch 4/40 | Train Acc: 0.105 | Val Acc: 0.147 | Train Loss: 3.6944 | Val Loss: 3.4429
[Run 4] Epoch 4/40 | Train Acc: 0.105 | Val Acc: 0.147 | Train Loss: 3.6944 | Val Loss: 3.4429
[Run 4] Epoch 5/40 | Train Acc: 0.101 | Val Acc: 0.153 | Train Loss: 3.5926 | Val Loss: 3.3097
[Run 4] Epoch 5/40 | Train Acc: 0.101 | Val Acc: 0.153 | Train Loss: 3.5926 | Val Loss: 3.3097
[Run 4] Epoch 6/

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
train_acc,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇████
train_loss,█▇▇▇▆▆▆▆▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁
val_acc,▁▁▁▂▂▃▂▃▃▄▄▄▅▄▄▅▆▆▆▄▆▆▆▆▆▇▇▇▆▇▇▇▇▇▇▇▇█▇▇
val_f1_macro,▁▁▁▂▂▃▂▃▃▃▄▄▄▄▄▅▅▆▆▄▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇██▇
val_f1_weighted,▁▁▁▂▂▃▂▃▃▃▄▄▄▄▄▅▅▆▆▄▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇██▇
val_loss,█▇▇▇▆▆▆▅▅▄▄▅▄▄▄▃▂▃▂▆▂▃▂▂▂▂▂▂▂▁▂▁▂▁▁▂▂▁▁▁
val_precision_macro,▁▁▁▂▃▃▃▃▃▄▄▄▅▄▄▆▅▆▆▅▆▆▇▇▇▇▇▇▇▇█▇▇███▇██▇
val_recall_macro,▁▁▁▂▂▃▂▃▃▄▄▄▅▄▄▅▆▆▆▄▆▆▆▆▆▇▇▇▆▇▇▇▇▇▇▇▇█▇▇

0,1
best_val_acc,0.48
epoch,40
lr,0.00168
train_acc,0.67357
train_loss,1.49028
val_acc,0.44667
val_f1_macro,0.42516
val_f1_weighted,0.42516
val_loss,2.38583
val_precision_macro,0.4353


✅ Tamaño calculado para FC: 43264
[Run 5] Epoch 1/40 | Train Acc: 0.046 | Val Acc: 0.067 | Train Loss: 4.1701 | Val Loss: 3.5367
[Run 5] Epoch 1/40 | Train Acc: 0.046 | Val Acc: 0.067 | Train Loss: 4.1701 | Val Loss: 3.5367
[Run 5] Epoch 2/40 | Train Acc: 0.061 | Val Acc: 0.140 | Train Loss: 3.9072 | Val Loss: 3.4141
[Run 5] Epoch 2/40 | Train Acc: 0.061 | Val Acc: 0.140 | Train Loss: 3.9072 | Val Loss: 3.4141
[Run 5] Epoch 3/40 | Train Acc: 0.090 | Val Acc: 0.133 | Train Loss: 3.7757 | Val Loss: 3.3805
[Run 5] Epoch 3/40 | Train Acc: 0.090 | Val Acc: 0.133 | Train Loss: 3.7757 | Val Loss: 3.3805
[Run 5] Epoch 4/40 | Train Acc: 0.104 | Val Acc: 0.160 | Train Loss: 3.6490 | Val Loss: 3.2587
[Run 5] Epoch 4/40 | Train Acc: 0.104 | Val Acc: 0.160 | Train Loss: 3.6490 | Val Loss: 3.2587
[Run 5] Epoch 5/40 | Train Acc: 0.119 | Val Acc: 0.193 | Train Loss: 3.5322 | Val Loss: 3.1783
[Run 5] Epoch 5/40 | Train Acc: 0.119 | Val Acc: 0.193 | Train Loss: 3.5322 | Val Loss: 3.1783
[Run 5] Epoch 6/

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
train_acc,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇███████
train_loss,█▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_acc,▁▂▂▃▃▃▄▄▄▅▃▅▅▆▆▆▆▆▆▇▆▇▆▆▇▇▇▇▇▇▇▇█▇█████▇
val_f1_macro,▁▂▂▂▃▃▄▄▄▅▃▅▅▆▅▅▆▆▆▇▆▇▆▇▇▇▇▇▇▇▇▇█▇██████
val_f1_weighted,▁▂▂▂▃▃▄▄▄▅▃▅▅▆▅▅▆▆▆▇▆▇▆▇▇▇▇▇▇▇▇▇█▇██████
val_loss,█▇▇▆▆▆▅▅▅▄▅▃▃▃▃▃▂▃▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▂▁▁▁
val_precision_macro,▁▃▂▃▃▄▅▄▅▆▄▆▅▆▆▆▇▆▆▇▆▇▆▇▇▇▇▇▇▇▇▇████████
val_recall_macro,▁▂▂▃▃▃▄▄▄▅▃▅▅▆▆▆▆▆▆▇▆▇▆▆▇▇▇▇▇▇▇▇█▇█████▇

0,1
best_val_acc,0.48667
epoch,40
lr,0.00017
train_acc,0.82
train_loss,1.1504
val_acc,0.45333
val_f1_macro,0.43001
val_f1_weighted,0.43001
val_loss,2.29591
val_precision_macro,0.45356


## Dataset Augmented

In [6]:
import os
import random
import shutil
import numpy as np
from PIL import Image
from tqdm import tqdm
from scipy.ndimage import map_coordinates

# ============================================================
# SPECAUGMENT DATA GENERATOR 
# Aplica Time Warping + Frequency Masking + Time Masking
# Solo al conjunto de TRAIN. Copia VAL y TEST sin alterar.
# ============================================================

BASE_DIR = "data/spectrograms2/base"
AUG_DIR  = "data/spectrograms2/augmented"

# --- Parámetros de SpecAugment (Park et al., 2019) ---
FREQ_MASK_PARAM = 20       # ancho máximo de bandas de frecuencia
TIME_MASK_PARAM = 25       # ancho máximo de regiones de tiempo
NUM_FREQ_MASKS  = 2        # cantidad de máscaras de frecuencia
NUM_TIME_MASKS  = 2        # cantidad de máscaras de tiempo

TIME_WARP_W     = 20       # parámetro de deformación temporal (warping)
RANDOM_SEED     = 42       # reproducibilidad

random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# Crear carpetas base del dataset aumentado
for split in ["train", "val", "test"]:
    os.makedirs(os.path.join(AUG_DIR, split), exist_ok=True)


# ============================================================
#  TRANSFORMACIONES SPECAUGMENT
# ============================================================

def time_warp(spec: np.ndarray, W: int = 20) -> np.ndarray:
    """Aplica deformación temporal (Time Warping) al espectrograma."""
    if W <= 0:
        return spec

    num_rows, num_cols = spec.shape
    if num_cols < 3:
        return spec

    center = num_cols // 2
    warp = np.random.randint(-W, W + 1)
    src_col = center
    dst_col = max(W, min(num_cols - W - 1, center + warp))

    src_cols = np.arange(num_cols)
    dst_cols = np.interp(src_cols, [0, src_col, num_cols - 1],
                         [0, dst_col, num_cols - 1])

    coords = np.meshgrid(np.arange(num_rows), dst_cols, indexing='ij')
    warped = map_coordinates(spec, coords, order=1, mode='reflect')
    return warped


def freq_mask(spec: np.ndarray, F: int = 20, n_masks: int = 2) -> np.ndarray:
    """Aplica Frequency Masking (en eje vertical del espectrograma)."""
    for _ in range(n_masks):
        f = random.randint(0, F)
        if f > 0 and spec.shape[0] >= f:
            f0 = random.randint(0, spec.shape[0] - f)
            spec[f0:f0 + f, :] = 0
    return spec


def time_mask(spec: np.ndarray, T: int = 25, n_masks: int = 2) -> np.ndarray:
    """Aplica Time Masking (en eje horizontal del espectrograma)."""
    for _ in range(n_masks):
        t = random.randint(0, T)
        if t > 0 and spec.shape[1] >= t:
            t0 = random.randint(0, spec.shape[1] - t)
            spec[:, t0:t0 + t] = 0
    return spec


# ============================================================
#  PIPELINE COMPLETO SPECAUGMENT
# ============================================================
def apply_specaugment(image_path: str, save_path: str):
    """
    Aplica SpecAugment completo (Time Warping + Frequency Masking + Time Masking)
    a una imagen de espectrograma en escala de grises.
    """
    try:
        img = Image.open(image_path).convert("L")
        spec = np.array(img, dtype=np.float32)

        # Aplicar transformaciones en orden
        spec = time_warp(spec, W=TIME_WARP_W)
        spec = freq_mask(spec, F=FREQ_MASK_PARAM, n_masks=NUM_FREQ_MASKS)
        spec = time_mask(spec, T=TIME_MASK_PARAM, n_masks=NUM_TIME_MASKS)

        # Guardar imagen resultante
        spec = np.clip(spec, 0, 255).astype(np.uint8)
        Image.fromarray(spec).save(save_path)

    except Exception as e:
        print(f"⚠️ Error procesando {image_path}: {e}")


# ============================================================
#  PROCESAMIENTO DE CONJUNTO TRAIN
# ============================================================
def process_train_augmented():
    """Aplica SpecAugment únicamente al conjunto de entrenamiento."""
    split_name = "train"
    base_split_path = os.path.join(BASE_DIR, split_name)
    aug_split_path  = os.path.join(AUG_DIR, split_name)

    print("\nProcesando TRAIN con SpecAugment:")
    print(f"   • Frequency masks: {NUM_FREQ_MASKS} (máx ancho: {FREQ_MASK_PARAM})")
    print(f"   • Time masks: {NUM_TIME_MASKS} (máx ancho: {TIME_MASK_PARAM})")

    for class_name in os.listdir(base_split_path):
        class_base_path = os.path.join(base_split_path, class_name)
        if not os.path.isdir(class_base_path):
            continue

        class_aug_path = os.path.join(aug_split_path, class_name)
        os.makedirs(class_aug_path, exist_ok=True)

        images = [f for f in os.listdir(class_base_path) if f.endswith(".png")]

        for img_file in tqdm(images, desc=f"train/{class_name}", leave=False):
            src_path = os.path.join(class_base_path, img_file)
            dst_path = os.path.join(class_aug_path, img_file)
            apply_specaugment(src_path, dst_path)


# ============================================================
#  COPIA LIMPIA DE VAL Y TEST
# ============================================================
def copy_val_test_clean():
    """Copia val y test desde base a augmented SIN aplicar augmentación."""
    for split in ["val", "test"]:
        src_dir = os.path.join(BASE_DIR, split)
        dst_dir = os.path.join(AUG_DIR, split)

        print(f"\nCopiando {split.upper()} (sin augmentación)...")

        if os.path.exists(dst_dir):
            shutil.rmtree(dst_dir)

        shutil.copytree(src_dir, dst_dir)

        total_files = sum(len(files) for _, _, files in os.walk(dst_dir))
        print(f"   {total_files} archivos copiados")


# ============================================================
#  EJECUCIÓN PRINCIPAL
# ============================================================
if __name__ == "__main__":
    print("=" * 60)
    print("GENERACIÓN DE DATASET AUMENTADO CON SPECAUGMENT")
    print("=" * 60)

    # 1. Aplicar SpecAugment SOLO a train
    process_train_augmented()

    # 2. Copiar val y test sin modificar
    copy_val_test_clean()

    print("\n" + "=" * 60)
    print("Dataset aumentado generado correctamente")
    print("=" * 60)
    print(f"Ubicación: {AUG_DIR}")
    print(f"   - train:  CON SpecAugment")
    print(f"   - val:    SIN augmentación")
    print(f"   - test:   SIN augmentación")
    print("=" * 60)


GENERACIÓN DE DATASET AUMENTADO CON SPECAUGMENT

Procesando TRAIN con SpecAugment:
   • Frequency masks: 2 (máx ancho: 20)
   • Time masks: 2 (máx ancho: 25)


                                                                        


Copiando VAL (sin augmentación)...
   300 archivos copiados

Copiando TEST (sin augmentación)...
   300 archivos copiados

Dataset aumentado generado correctamente
Ubicación: data/spectrograms2/augmented
   - train:  CON SpecAugment
   - val:    SIN augmentación
   - test:   SIN augmentación


## Entrenamiento Dataset Augmented Modelo A

In [5]:
from sklearn.metrics import precision_recall_fscore_support

# ==============================
# CONFIGURACIÓN
# ==============================
DATA_DIR = "data/spectrograms2/augmented"
IMG_SIZE = (224, 224)
EPOCHS = 40
PATIENCE = 6

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==============================
# TRANSFORMACIONES
# ==============================
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

train_data = datasets.ImageFolder(f"{DATA_DIR}/train", transform=transform)
val_data   = datasets.ImageFolder(f"{DATA_DIR}/val", transform=transform)

class_names = train_data.classes

# ==============================
# EXPERIMENTOS (Optimizados para dataset aumentado)
# ==============================
experiments = [
    {"optimizer": "Adam", "lr": 0.001, "batch_size": 32, "weight_decay": 1e-4},
    {"optimizer": "Adam", "lr": 0.0005, "batch_size": 32, "weight_decay": 1e-4},
    {"optimizer": "Adam", "lr": 0.001, "batch_size": 64, "weight_decay": 1e-4},
    {"optimizer": "AdamW", "lr": 0.001, "batch_size": 32, "weight_decay": 1e-4},
    {"optimizer": "SGD", "lr": 0.01, "batch_size": 32, "weight_decay": 1e-4},
]

# ==============================
# CICLO DE ENTRENAMIENTO
# ==============================
for i, exp in enumerate(experiments, start=1):
    try:
        wandb.finish()  # Cerrar cualquier run previo
    except:
        pass
        
    wandb.init(
        project="esc50-lenet-augmented_stratified",
        name=f"run_{i}_opt-{exp['optimizer']}_lr-{exp['lr']}_bs-{exp['batch_size']}",
        config=exp
    )
    config = wandb.config

    train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True)
    val_loader   = DataLoader(val_data, batch_size=config.batch_size)

    model = LeNet5(num_classes=len(train_data.classes)).to(device)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)  # Label smoothing para augmented data

    if config.optimizer in ["Adam", "AdamW"]:
        if config.optimizer == "Adam":
            optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=config.weight_decay)
        else:
            optimizer = optim.AdamW(model.parameters(), lr=config.lr, weight_decay=config.weight_decay)
    else:
        optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, weight_decay=config.weight_decay)

    scheduler = StepLR(optimizer, step_size=10, gamma=0.5)  # Scheduler más suave para augmented

    best_val_acc = 0.0
    patience_counter = 0

    for epoch in range(EPOCHS):
        # === ENTRENAMIENTO ===
        model.train()
        running_loss, correct, total = 0, 0, 0

        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()

        train_acc = correct / total
        train_loss = running_loss / len(train_loader)

        # === VALIDACIÓN ===
        model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        val_y_true, val_y_pred = [], []
        
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (preds == labels).sum().item()
                
                val_y_true.extend(labels.cpu().tolist())
                val_y_pred.extend(preds.cpu().tolist())

        val_acc = val_correct / val_total
        val_loss /= len(val_loader)
        
        # === MÉTRICAS ADICIONALES ===
        prec_m, rec_m, f1_m, _ = precision_recall_fscore_support(
            val_y_true, val_y_pred, average="macro", zero_division=0
        )
        _, _, f1_w, _ = precision_recall_fscore_support(
            val_y_true, val_y_pred, average="weighted", zero_division=0
        )
        
        scheduler.step()

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "val_loss": val_loss,
            "val_acc": val_acc,
            "val_f1_macro": f1_m,
            "val_f1_weighted": f1_w,
            "val_precision_macro": prec_m,
            "val_recall_macro": rec_m,
            "lr": scheduler.get_last_lr()[0],
            "val_confusion_matrix": wandb.plot.confusion_matrix(
                y_true=val_y_true,
                preds=val_y_pred,
                class_names=class_names
            )
        })

        print(f"[Run {i}] Epoch {epoch+1}/{EPOCHS} | "
              f"Train Acc: {train_acc:.3f} | Val Acc: {val_acc:.3f} | "
              f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

        # === EARLY STOPPING ===
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), f"models/lenet5_aug_best_run{i}.pth")
            wandb.run.summary["best_val_acc"] = best_val_acc
        else:
            patience_counter += 1
            if patience_counter > PATIENCE:
                print(f"[Run {i}] Early stopping triggered at epoch {epoch+1}.")
                break

    print(f"✅ [Run {i}] Mejor Val Acc: {best_val_acc:.3f}")
    wandb.finish()


✅ Tamaño calculado para FC: 43264
[Run 1] Epoch 1/40 | Train Acc: 0.038 | Val Acc: 0.063 | Train Loss: 4.3361 | Val Loss: 3.8046
[Run 1] Epoch 2/40 | Train Acc: 0.049 | Val Acc: 0.073 | Train Loss: 4.1031 | Val Loss: 3.7396
[Run 1] Epoch 3/40 | Train Acc: 0.059 | Val Acc: 0.073 | Train Loss: 4.0263 | Val Loss: 3.7029
[Run 1] Epoch 4/40 | Train Acc: 0.063 | Val Acc: 0.100 | Train Loss: 3.9503 | Val Loss: 3.6340
[Run 1] Epoch 5/40 | Train Acc: 0.071 | Val Acc: 0.090 | Train Loss: 3.9216 | Val Loss: 3.6088
[Run 1] Epoch 6/40 | Train Acc: 0.074 | Val Acc: 0.110 | Train Loss: 3.8430 | Val Loss: 3.6301
[Run 1] Epoch 7/40 | Train Acc: 0.081 | Val Acc: 0.127 | Train Loss: 3.8011 | Val Loss: 3.6149
[Run 1] Epoch 8/40 | Train Acc: 0.074 | Val Acc: 0.140 | Train Loss: 3.8095 | Val Loss: 3.6693
[Run 1] Epoch 9/40 | Train Acc: 0.104 | Val Acc: 0.147 | Train Loss: 3.7178 | Val Loss: 3.5368
[Run 1] Epoch 10/40 | Train Acc: 0.094 | Val Acc: 0.117 | Train Loss: 3.6442 | Val Loss: 3.6160
[Run 1] Epoch 1

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇███
lr,█████████▄▄▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▁▁
train_acc,▁▁▂▂▂▂▃▂▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▆▆▇▇█
train_loss,█▇▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁
val_acc,▁▁▁▃▂▃▄▄▄▃▅▃▆▄▆▆▆▅▆▆▆█▇█▇▇▆▇▇▇▇
val_f1_macro,▁▁▂▂▂▂▄▄▄▃▅▄▅▄▅▆▆▅▆▆▆█▇▇▇▇▆▇▇▇▇
val_f1_weighted,▁▁▂▂▂▂▄▄▄▃▅▄▅▄▅▆▆▅▆▆▆█▇▇▇▇▆▇▇▇▇
val_loss,█▇▇▆▆▆▆▆▅▆▄▄▃▄▄▃▃▄▃▃▂▂▂▁▂▂▃▁▁▂▁
val_precision_macro,▁▁▂▃▂▂▄▅▅▄▄▄▅▃▅▆▆▅▆▆▅█▇▇▇▇▅▆▆█▇
val_recall_macro,▁▁▁▃▂▃▄▄▄▃▅▃▆▄▆▆▆▅▆▆▆█▇█▇▇▆▇▇▇▇

0,1
best_val_acc,0.23333
epoch,31
lr,0.00013
train_acc,0.23214
train_loss,3.04795
val_acc,0.20333
val_f1_macro,0.18072
val_f1_weighted,0.18072
val_loss,3.21286
val_precision_macro,0.21507


✅ Tamaño calculado para FC: 43264
[Run 2] Epoch 1/40 | Train Acc: 0.032 | Val Acc: 0.090 | Train Loss: 4.3643 | Val Loss: 3.7036
[Run 2] Epoch 1/40 | Train Acc: 0.032 | Val Acc: 0.090 | Train Loss: 4.3643 | Val Loss: 3.7036
[Run 2] Epoch 2/40 | Train Acc: 0.046 | Val Acc: 0.083 | Train Loss: 4.1346 | Val Loss: 3.7164
[Run 2] Epoch 2/40 | Train Acc: 0.046 | Val Acc: 0.083 | Train Loss: 4.1346 | Val Loss: 3.7164
[Run 2] Epoch 3/40 | Train Acc: 0.063 | Val Acc: 0.097 | Train Loss: 4.0402 | Val Loss: 3.6545
[Run 2] Epoch 3/40 | Train Acc: 0.063 | Val Acc: 0.097 | Train Loss: 4.0402 | Val Loss: 3.6545
[Run 2] Epoch 4/40 | Train Acc: 0.063 | Val Acc: 0.113 | Train Loss: 3.9724 | Val Loss: 3.6718
[Run 2] Epoch 4/40 | Train Acc: 0.063 | Val Acc: 0.113 | Train Loss: 3.9724 | Val Loss: 3.6718
[Run 2] Epoch 5/40 | Train Acc: 0.054 | Val Acc: 0.120 | Train Loss: 3.9768 | Val Loss: 3.5606
[Run 2] Epoch 5/40 | Train Acc: 0.054 | Val Acc: 0.120 | Train Loss: 3.9768 | Val Loss: 3.5606
[Run 2] Epoch 6/

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
lr,█████████▄▄▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
train_acc,▁▁▂▂▂▂▃▃▂▃▃▃▃▄▄▄▄▅▅▅▅▆▅▆▆▆▆▇▆▇▇▇▇███
train_loss,█▇▆▆▆▅▅▆▅▅▅▄▄▄▃▃▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁
val_acc,▁▁▂▃▃▃▃▁▃▂▄▃▅▅▄▄▆▇▇▇▆▇▆▆█▆▇███▆▇█▆▇▇
val_f1_macro,▂▁▂▂▃▄▃▁▃▂▄▃▅▅▄▃▆▇▇▇▆▇▇▅▇▇▇██▇▇▇█▆▇▇
val_f1_weighted,▂▁▂▂▃▄▃▁▃▂▄▃▅▅▄▃▆▇▇▇▆▇▇▅▇▇▇██▇▇▇█▆▇▇
val_loss,██▇▇▆▇▇▇▆▆▅▅▄▄▄▄▃▄▃▂▂▃▂▂▁▂▁▁▁▂▂▁▂▂▂▁
val_precision_macro,▃▁▂▂▃▄▂▁▄▂▄▁▅▃▄▂▅█▇▇▆█▇▄▇▆▇▆▇▇▇▆█▇▆▆
val_recall_macro,▁▁▂▃▃▃▃▁▃▂▄▃▅▅▄▄▆▇▇▇▆▇▆▆█▆▇███▆▇█▆▇▇

0,1
best_val_acc,0.22333
epoch,36
lr,6e-05
train_acc,0.23
train_loss,3.06625
val_acc,0.20333
val_f1_macro,0.15689
val_f1_weighted,0.15689
val_loss,3.24769
val_precision_macro,0.14931


✅ Tamaño calculado para FC: 43264
[Run 3] Epoch 1/40 | Train Acc: 0.029 | Val Acc: 0.057 | Train Loss: 4.3466 | Val Loss: 3.6992
[Run 3] Epoch 1/40 | Train Acc: 0.029 | Val Acc: 0.057 | Train Loss: 4.3466 | Val Loss: 3.6992
[Run 3] Epoch 2/40 | Train Acc: 0.056 | Val Acc: 0.097 | Train Loss: 4.0989 | Val Loss: 3.6666
[Run 3] Epoch 2/40 | Train Acc: 0.056 | Val Acc: 0.097 | Train Loss: 4.0989 | Val Loss: 3.6666
[Run 3] Epoch 3/40 | Train Acc: 0.062 | Val Acc: 0.103 | Train Loss: 3.9684 | Val Loss: 3.6041
[Run 3] Epoch 3/40 | Train Acc: 0.062 | Val Acc: 0.103 | Train Loss: 3.9684 | Val Loss: 3.6041
[Run 3] Epoch 4/40 | Train Acc: 0.074 | Val Acc: 0.080 | Train Loss: 3.9314 | Val Loss: 3.6383
[Run 3] Epoch 4/40 | Train Acc: 0.074 | Val Acc: 0.080 | Train Loss: 3.9314 | Val Loss: 3.6383
[Run 3] Epoch 5/40 | Train Acc: 0.064 | Val Acc: 0.110 | Train Loss: 3.9128 | Val Loss: 3.5940
[Run 3] Epoch 5/40 | Train Acc: 0.064 | Val Acc: 0.110 | Train Loss: 3.9128 | Val Loss: 3.5940
[Run 3] Epoch 6/

0,1
epoch,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇██
lr,█████████▃▃▃▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁▁▁
train_acc,▁▂▂▃▂▃▃▃▃▄▄▄▅▅▄▅▅▅▅▆▆▇▆▇▆█▇▇█
train_loss,█▇▆▆▅▅▅▅▅▅▄▄▃▃▃▃▃▃▂▂▂▂▂▁▂▁▁▁▁
val_acc,▁▃▃▂▃▃▄▁▄▃▃▅▆▅▆▅▄▅▅▄▆█▇▇▇█▆▇█
val_f1_macro,▁▂▃▃▃▃▄▂▃▃▄▅▆▅▆▅▅▅▆▄▆▇▇▇▇█▆▇▇
val_f1_weighted,▁▂▃▃▃▃▄▂▃▃▄▅▆▅▆▅▅▅▆▄▆▇▇▇▇█▆▇▇
val_loss,▆▆▅▆▅▅▅█▅▄▄▃▃▃▃▃▃▃▄▅▂▁▂▂▂▁▂▂▁
val_precision_macro,▁▂▂▄▃▄▄▂▄▄▃▅▆▅▆▅▄▄▇▅▆▆▆▇▇█▆▆▆
val_recall_macro,▁▃▃▂▃▃▄▁▄▃▃▅▆▅▆▅▄▅▅▄▆█▇▇▇█▆▇█

0,1
best_val_acc,0.22667
epoch,29
lr,0.00025
train_acc,0.20214
train_loss,3.15493
val_acc,0.21667
val_f1_macro,0.17167
val_f1_weighted,0.17167
val_loss,3.16702
val_precision_macro,0.16638


✅ Tamaño calculado para FC: 43264
[Run 4] Epoch 1/40 | Train Acc: 0.040 | Val Acc: 0.087 | Train Loss: 4.3782 | Val Loss: 3.7742
[Run 4] Epoch 1/40 | Train Acc: 0.040 | Val Acc: 0.087 | Train Loss: 4.3782 | Val Loss: 3.7742
[Run 4] Epoch 2/40 | Train Acc: 0.053 | Val Acc: 0.087 | Train Loss: 4.1743 | Val Loss: 3.6625
[Run 4] Epoch 2/40 | Train Acc: 0.053 | Val Acc: 0.087 | Train Loss: 4.1743 | Val Loss: 3.6625
[Run 4] Epoch 3/40 | Train Acc: 0.048 | Val Acc: 0.087 | Train Loss: 4.0762 | Val Loss: 3.7167
[Run 4] Epoch 3/40 | Train Acc: 0.048 | Val Acc: 0.087 | Train Loss: 4.0762 | Val Loss: 3.7167
[Run 4] Epoch 4/40 | Train Acc: 0.049 | Val Acc: 0.107 | Train Loss: 4.0676 | Val Loss: 3.6522
[Run 4] Epoch 4/40 | Train Acc: 0.049 | Val Acc: 0.107 | Train Loss: 4.0676 | Val Loss: 3.6522
[Run 4] Epoch 5/40 | Train Acc: 0.060 | Val Acc: 0.090 | Train Loss: 3.9760 | Val Loss: 3.7255
[Run 4] Epoch 5/40 | Train Acc: 0.060 | Val Acc: 0.090 | Train Loss: 3.9760 | Val Loss: 3.7255
[Run 4] Epoch 6/

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,█████████▄▄▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
train_acc,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▅▆▆▅▅▇▇▇▆▇▆▇███▇
train_loss,█▇▆▆▆▆▅▅▅▅▄▄▄▄▃▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▂▁▂▁▁▁▁▁
val_acc,▃▃▃▃▃▁▃▄▅▄▃▃▅▅▅▄▆▆▆▄▅▆▆▆▇▆▆▇▇▇▇▇▆▇▇█████
val_f1_macro,▂▂▂▃▂▁▃▃▄▄▂▃▅▅▅▄▅▅▆▃▅▆▅▆▇▆▆▇▇▇▇▇▆▇▇███▇▇
val_f1_weighted,▂▂▂▃▂▁▃▃▄▄▂▃▅▅▅▄▅▅▆▃▅▆▅▆▇▆▆▇▇▇▇▇▆▇▇███▇▇
val_loss,▆▅▅▅▅█▄▄▃▃▆▄▃▃▃▃▃▂▂▄▃▂▃▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁
val_precision_macro,▂▂▂▃▂▁▃▃▄▄▃▃▄▅▄▅▆▄▆▃▄▅▆▆▇▆▆▇▆▇▆▇▆▆█▇██▇█
val_recall_macro,▃▃▃▃▃▁▃▄▅▄▃▃▅▅▅▄▆▆▆▄▅▆▆▆▇▆▆▇▇▇▇▇▆▇▇█████

0,1
best_val_acc,0.25333
epoch,40
lr,6e-05
train_acc,0.21286
train_loss,3.00682
val_acc,0.24
val_f1_macro,0.20253
val_f1_weighted,0.20253
val_loss,3.12573
val_precision_macro,0.23907


✅ Tamaño calculado para FC: 43264
[Run 5] Epoch 1/40 | Train Acc: 0.049 | Val Acc: 0.073 | Train Loss: 4.2707 | Val Loss: 3.7770
[Run 5] Epoch 1/40 | Train Acc: 0.049 | Val Acc: 0.073 | Train Loss: 4.2707 | Val Loss: 3.7770
[Run 5] Epoch 2/40 | Train Acc: 0.064 | Val Acc: 0.077 | Train Loss: 4.0033 | Val Loss: 3.8424
[Run 5] Epoch 2/40 | Train Acc: 0.064 | Val Acc: 0.077 | Train Loss: 4.0033 | Val Loss: 3.8424
[Run 5] Epoch 3/40 | Train Acc: 0.071 | Val Acc: 0.103 | Train Loss: 3.8861 | Val Loss: 3.6279
[Run 5] Epoch 3/40 | Train Acc: 0.071 | Val Acc: 0.103 | Train Loss: 3.8861 | Val Loss: 3.6279
[Run 5] Epoch 4/40 | Train Acc: 0.090 | Val Acc: 0.110 | Train Loss: 3.8336 | Val Loss: 3.7146
[Run 5] Epoch 4/40 | Train Acc: 0.090 | Val Acc: 0.110 | Train Loss: 3.8336 | Val Loss: 3.7146
[Run 5] Epoch 5/40 | Train Acc: 0.111 | Val Acc: 0.130 | Train Loss: 3.6226 | Val Loss: 3.6607
[Run 5] Epoch 5/40 | Train Acc: 0.111 | Val Acc: 0.130 | Train Loss: 3.6226 | Val Loss: 3.6607
[Run 5] Epoch 6/

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,█████████▄▄▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
train_acc,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇█▇█▇██
train_loss,█▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_acc,▁▁▂▂▃▂▃▃▂▄▃▃▅▅▄▅▅▅▆▆▆▅▇▇▅▇▇█▇█▇▇▇███████
val_f1_macro,▁▂▂▂▃▂▃▃▃▄▃▃▄▅▄▅▅▅▇▆▇▆▇▇▅▇▇█▇█▇▇████████
val_f1_weighted,▁▂▂▂▃▂▃▃▃▄▃▃▄▅▄▅▅▅▇▆▇▆▇▇▅▇▇█▇█▇▇████████
val_loss,▇█▅▆▆▆▄▆▅▃▄▅▃▄▅▂▃▄▂▂▂▁▂▂▁▂▂▁▂▂▁▁▂▂▂▁▂▂▂▁
val_precision_macro,▁▂▂▂▂▂▃▃▃▃▄▂▄▅▅▅▅▅▇▆▆▆▇▇▅▇▇▇█▇▇▇█▇▇▇█▇▇█
val_recall_macro,▁▁▂▂▃▂▃▃▂▄▃▃▅▅▄▅▅▅▆▆▆▅▇▇▅▇▇█▇█▇▇▇███████

0,1
best_val_acc,0.27667
epoch,40
lr,0.00063
train_acc,0.66357
train_loss,1.84448
val_acc,0.27333
val_f1_macro,0.24887
val_f1_weighted,0.24887
val_loss,3.26343
val_precision_macro,0.31134


# Modelo B

In [1]:
from typing import Callable, Optional, Type, List
import torch
import torch.nn as nn


# -------------------------
# Utilidades de convolución
# -------------------------
def conv3x3(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
    """Conv 3×3 con padding=1, sin bias (BN lo compensa)."""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)


def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
    """Conv 1×1 para proyección en atajos (ajustar canales/stride)."""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


# -------------------------
# Bloque residual "básico"
# -------------------------
class BasicBlock(nn.Module):
    """
    Estructura:
        Conv3x3 → BN → ReLU → Conv3x3 → BN → (Suma con atajo) → ReLU
    Donde el atajo (identity) puede incluir una proyección 1×1 si cambia
    la resolución (stride > 1) o el número de canales.
    """
    expansion: int = 1

    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        downsample: Optional[nn.Module] = None,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d

        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1   = norm_layer(planes)
        self.relu  = nn.ReLU(inplace=True)

        self.conv2 = conv3x3(planes, planes)
        self.bn2   = norm_layer(planes)

        self.downsample = downsample  # Proyección para el atajo, si aplica

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        identity = x  # Atajo

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        # Alinear dimensiones del atajo si cambió stride o # de canales
        if self.downsample is not None:
            identity = self.downsample(x)

        out = out + identity
        out = self.relu(out)
        return out


# -----------
# ResNet base
# -----------
class ResNet(nn.Module):
    """
    Constructor general de ResNet con BasicBlock y configuración [2,2,2,2].
    Parámetros clave:
        - small_input=True: conv1=3×3 s=1 y sin MaxPool (mejor para 64–224 px).
        - small_input=False: conv1=7×7 s=2 + MaxPool (clásico de ResNet).
    """
    def __init__(
        self,
        block: Type[BasicBlock],
        layers: List[int],
        num_classes: int = 50,
        in_channels: int = 1,
        small_input: bool = True,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64

        # Capa inicial: variante "small_input" recomendada para espectrogramas
        if small_input:
            # Preserva más detalle inicial (sin MaxPool temprano)
            self.conv1   = nn.Conv2d(in_channels, 64, kernel_size=3, stride=1, padding=1, bias=False)
            self.maxpool = nn.Identity()
        else:
            # Estilo ResNet clásico para entradas grandes
            self.conv1   = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
            self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.bn1  = norm_layer(64)
        self.relu = nn.ReLU(inplace=True)

        # Stages: [64, 128, 256, 512] con [2, 2, 2, 2] bloques
        self.layer1 = self._make_layer(block,  64, layers[0], stride=1)
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        # Cabeza de clasificación
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # Global Average Pooling
        self.fc      = nn.Linear(512 * block.expansion, num_classes)

        # Inicialización recomendada para ReLU/BN
        self._init_weights()

    def _make_layer(self, block: Type[BasicBlock], planes: int, blocks: int, stride: int = 1) -> nn.Sequential:
        """
        Crea un stage con 'blocks' bloques. El primer bloque puede hacer downsample
        (stride=2) para reducir resolución y duplicar canales.
        """
        norm_layer = self._norm_layer
        downsample = None

        # Si cambia resolución o # de canales, proyectamos el atajo (1×1)
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def _init_weights(self) -> None:
        """Inicialización Kaiming para conv; constantes para BN; normal para FC."""
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1.0)
                nn.init.constant_(m.bias, 0.0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0.0)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Entrada → conv1 → BN → ReLU → (posible MaxPool/Identity)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        # Stages residuales
        x = self.layer1(x)  # 64
        x = self.layer2(x)  # 128
        x = self.layer3(x)  # 256
        x = self.layer4(x)  # 512

        # Cabeza
        x = self.avgpool(x)           # (B, 512, 1, 1)
        x = torch.flatten(x, 1)       # (B, 512)
        x = self.fc(x)                # (B, num_classes)
        return x


# -------------------------
# Fábrica de ResNet-18
# -------------------------
def resnet18_audio(num_classes: int = 50, in_channels: int = 1, small_input: bool = True) -> ResNet:
    """
    Retorna una ResNet-18 lista para espectrogramas:
        - num_classes: # de clases del dataset (ESC-50 → 50)
        - in_channels: 1 para grises; 3 si usas RGB (replicar canal)
        - small_input: True recomendado para ~128–224 px
    """
    return ResNet(
        block=BasicBlock,
        layers=[2, 2, 2, 2],
        num_classes=num_classes,
        in_channels=in_channels,
        small_input=small_input,
    )

## Entrenamiento Dataset Raw Modelo B

In [3]:
# ===========================================
# ENTRENAMIENTO - MODELO B (ResNet-18 Audio)
# Dataset: data/spectrograms1/base (RAW, sin augment)
# Imagen: 228x228
# GPU: <= 4 GB compatible
# ===========================================

import os, random, gc
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import wandb

from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
from torch.amp import autocast, GradScaler
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix

# -----------------------------
# 0) Configuración y utilidades
# -----------------------------
os.makedirs("models", exist_ok=True)

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# -----------------------------
# 1) Data (RAW)
# -----------------------------
DATA_DIR = "data/spectrograms2/base"
IMG_SIZE = (228, 228)

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # [-1,1]
])

train_data = datasets.ImageFolder(f"{DATA_DIR}/train", transform=transform)
val_data   = datasets.ImageFolder(f"{DATA_DIR}/val",   transform=transform)

num_classes = len(train_data.classes)
class_names = train_data.classes
print("Clases detectadas:", num_classes)

# -----------------------------
# 2) Experimentos
# -----------------------------
experiments = [
    {"optimizer": "AdamW", "lr": 3e-4,  "batch_size": 8, "weight_decay": 1e-4},
    {"optimizer": "AdamW", "lr": 1e-4,  "batch_size": 8, "weight_decay": 1e-4},
    {"optimizer": "SGD",   "lr": 0.01,  "batch_size": 8, "weight_decay": 1e-4},
    {"optimizer": "SGD",   "lr": 0.005, "batch_size": 8, "weight_decay": 1e-4},
    {"optimizer": "AdamW", "lr": 5e-4,  "batch_size": 12, "weight_decay": 1e-4},
]

EPOCHS   = 40
PATIENCE = 6

# -----------------------------
# 3) Loop multi-run
# -----------------------------
for i, exp in enumerate(experiments, start=1):
    print(f"\n===== Iniciando experimento {i} =====")

    wandb.init(
        project="esc50-modelB_stratified",
        name=f"resnet18B_run_{i}_opt-{exp['optimizer']}_lr-{exp['lr']}_bs-{exp['batch_size']}",
        config=exp,
        mode="offline",
    )
    config = wandb.config

    train_loader = DataLoader(
        train_data,
        batch_size=config.batch_size,
        shuffle=True,
        num_workers=2,
        pin_memory=True
    )
    val_loader = DataLoader(
        val_data,
        batch_size=config.batch_size,
        shuffle=False,
        num_workers=2,
        pin_memory=True
    )

    # -------------------------------
    # Modelo (ResNet-18 personalizada)
    # -------------------------------
    model = resnet18_audio(num_classes=num_classes, in_channels=1, small_input=True).to(device)

    criterion = nn.CrossEntropyLoss()
    if config.optimizer == "AdamW":
        optimizer = optim.AdamW(model.parameters(), lr=config.lr, weight_decay=config.weight_decay)
    else:
        optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, weight_decay=config.weight_decay)

    scheduler = StepLR(optimizer, step_size=8, gamma=0.7)
    scaler = GradScaler('cuda' if torch.cuda.is_available() else 'cpu')

    best_val_acc = 0.0
    patience_counter = 0

    # -------------------------------
    # Entrenamiento por épocas
    # -------------------------------
    for epoch in range(EPOCHS):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for imgs, labels in train_loader:
            imgs = imgs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            optimizer.zero_grad(set_to_none=True)

            with autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
                outputs = model(imgs)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            _, preds = torch.max(outputs, 1)
            running_loss += loss.item()
            total += labels.size(0)
            correct += (preds == labels).sum().item()

            # Limpieza batch
            del imgs, labels, outputs, loss, preds
            torch.cuda.empty_cache()

        train_acc = correct / total
        train_loss = running_loss / max(1, len(train_loader))

        # --------- Validación ---------
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        val_y_true, val_y_pred = [], []

        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs = imgs.to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)

                with autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
                    outputs = model(imgs)
                    loss = criterion(outputs, labels)

                _, preds = torch.max(outputs, 1)
                val_loss += loss.item()
                val_total += labels.size(0)
                val_correct += (preds == labels).sum().item()

                val_y_true.extend(labels.cpu().tolist())
                val_y_pred.extend(preds.cpu().tolist())

                del imgs, labels, outputs, preds, loss
                torch.cuda.empty_cache()

        val_acc = val_correct / val_total
        val_loss = val_loss / max(1, len(val_loader))

        # --- Métricas adicionales
        prec_m, rec_m, f1_m, _ = precision_recall_fscore_support(
            val_y_true, val_y_pred, average="macro", zero_division=0
        )
        prec_w, rec_w, f1_w, _ = precision_recall_fscore_support(
            val_y_true, val_y_pred, average="weighted", zero_division=0
        )

        scheduler.step()
        gc.collect()

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "val_loss": val_loss,
            "val_acc": val_acc,
            "val_f1_macro": f1_m,
            "val_f1_weighted": f1_w,
            "val_precision_macro": prec_m,
            "val_recall_macro": rec_m,
            "lr": scheduler.get_last_lr()[0],
            "val_confusion_matrix": wandb.plot.confusion_matrix(
                y_true=val_y_true,
                preds=val_y_pred,
                class_names=class_names
            )
        })

        print(f"[Run {i}] Ep {epoch+1:02d}/{EPOCHS} | "
              f"Train Acc: {train_acc:.3f} | Val Acc: {val_acc:.3f} | "
              f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

        # --- Early stopping y guardado
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), f"models/MODEL_B_resnet18_audio_best_run{i}.pth")
            wandb.run.summary["best_val_acc"] = best_val_acc
        else:
            patience_counter += 1
            if patience_counter > PATIENCE:
                print(f"[Run {i}] Early stopping en epoch {epoch+1}.")
                break

        torch.cuda.empty_cache()
        gc.collect()

    print(f"✅ [Run {i}] Mejor Val Acc: {best_val_acc:.3f}")

    wandb.finish()
    del model, optimizer, scheduler, scaler
    torch.cuda.empty_cache()
    gc.collect()


Device: cuda
Clases detectadas: 50

===== Iniciando experimento 1 =====


[Run 1] Ep 01/40 | Train Acc: 0.063 | Val Acc: 0.107 | Train Loss: 3.5940 | Val Loss: 3.2807
[Run 1] Ep 02/40 | Train Acc: 0.114 | Val Acc: 0.233 | Train Loss: 3.1759 | Val Loss: 2.8340
[Run 1] Ep 02/40 | Train Acc: 0.114 | Val Acc: 0.233 | Train Loss: 3.1759 | Val Loss: 2.8340
[Run 1] Ep 03/40 | Train Acc: 0.194 | Val Acc: 0.173 | Train Loss: 2.9165 | Val Loss: 2.8972
[Run 1] Ep 03/40 | Train Acc: 0.194 | Val Acc: 0.173 | Train Loss: 2.9165 | Val Loss: 2.8972
[Run 1] Ep 04/40 | Train Acc: 0.225 | Val Acc: 0.263 | Train Loss: 2.7275 | Val Loss: 2.6050
[Run 1] Ep 04/40 | Train Acc: 0.225 | Val Acc: 0.263 | Train Loss: 2.7275 | Val Loss: 2.6050
[Run 1] Ep 05/40 | Train Acc: 0.276 | Val Acc: 0.240 | Train Loss: 2.5378 | Val Loss: 2.6645
[Run 1] Ep 05/40 | Train Acc: 0.276 | Val Acc: 0.240 | Train Loss: 2.5378 | Val Loss: 2.6645
[Run 1] Ep 06/40 | Train Acc: 0.311 | Val Acc: 0.330 | Train Loss: 2.4449 | Val Loss: 2.2783
[Run 1] Ep 06/40 | Train Acc: 0.311 | Val Acc: 0.330 | Train Loss: 2.4

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
train_acc,▁▁▂▂▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇████████
train_loss,█▇▆▆▆▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_acc,▁▂▂▃▂▃▄▅▅▅▅▅▅▅▆▆▅▆▇▇▆▇▇▆▇▇▇▇▇▇▇▇███▇████
val_f1_macro,▁▂▁▂▂▃▃▄▅▅▅▅▅▅▆▆▆▆▇▇▆▇▇▆▇▇▇▇▇▇▇▇███▇████
val_f1_weighted,▁▂▁▂▂▃▃▄▅▅▅▅▅▅▆▆▆▆▇▇▆▇▇▆▇▇▇▇▇▇▇▇███▇████
val_loss,█▇▇▆▆▅▅▄▄▃▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁
val_precision_macro,▁▂▁▃▂▄▄▅▅▅▅▅▅▅▇▇▆▆▇▇▇▇▇▆▇▇▇███████████▇█
val_recall_macro,▁▂▂▃▂▃▄▅▅▅▅▅▅▅▆▆▅▆▇▇▆▇▇▆▇▇▇▇▇▇▇▇███▇████

0,1
best_val_acc,0.76
epoch,40
lr,5e-05
train_acc,0.89786
train_loss,0.45052
val_acc,0.73667
val_f1_macro,0.73001
val_f1_weighted,0.73001
val_loss,0.91348
val_precision_macro,0.76383



===== Iniciando experimento 2 =====


[Run 2] Ep 01/40 | Train Acc: 0.084 | Val Acc: 0.160 | Train Loss: 3.5569 | Val Loss: 3.0882
[Run 2] Ep 02/40 | Train Acc: 0.175 | Val Acc: 0.203 | Train Loss: 3.0771 | Val Loss: 2.8628
[Run 2] Ep 02/40 | Train Acc: 0.175 | Val Acc: 0.203 | Train Loss: 3.0771 | Val Loss: 2.8628
[Run 2] Ep 03/40 | Train Acc: 0.221 | Val Acc: 0.250 | Train Loss: 2.8034 | Val Loss: 2.6208
[Run 2] Ep 03/40 | Train Acc: 0.221 | Val Acc: 0.250 | Train Loss: 2.8034 | Val Loss: 2.6208
[Run 2] Ep 04/40 | Train Acc: 0.284 | Val Acc: 0.290 | Train Loss: 2.6132 | Val Loss: 2.5024
[Run 2] Ep 04/40 | Train Acc: 0.284 | Val Acc: 0.290 | Train Loss: 2.6132 | Val Loss: 2.5024
[Run 2] Ep 05/40 | Train Acc: 0.316 | Val Acc: 0.383 | Train Loss: 2.4464 | Val Loss: 2.2706
[Run 2] Ep 05/40 | Train Acc: 0.316 | Val Acc: 0.383 | Train Loss: 2.4464 | Val Loss: 2.2706
[Run 2] Ep 06/40 | Train Acc: 0.373 | Val Acc: 0.320 | Train Loss: 2.2540 | Val Loss: 2.4468
[Run 2] Ep 06/40 | Train Acc: 0.373 | Val Acc: 0.320 | Train Loss: 2.2

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▃▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁
train_acc,▁▂▂▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██████
train_loss,█▇▆▆▅▅▅▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
val_acc,▁▂▂▃▄▃▅▅▄▅▆▆▆▆▆▆▇▆▇▇▇▇▇███▇████
val_f1_macro,▁▁▂▂▄▃▄▅▄▅▆▆▆▆▆▆▇▆▇▇▇▇▇███▇████
val_f1_weighted,▁▁▂▂▄▃▄▅▄▅▆▆▆▆▆▆▇▆▇▇▇▇▇███▇████
val_loss,█▇▆▆▅▆▄▄▄▃▃▃▃▃▂▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_precision_macro,▁▂▂▃▄▄▅▅▄▅▆▆▆▇▆▆▇▇▇▇▇▇▇███▇████
val_recall_macro,▁▂▂▃▄▃▅▅▄▅▆▆▆▆▆▆▇▆▇▇▇▇▇███▇████

0,1
best_val_acc,0.74333
epoch,31
lr,3e-05
train_acc,0.89
train_loss,0.60365
val_acc,0.74
val_f1_macro,0.73624
val_f1_weighted,0.73624
val_loss,1.00237
val_precision_macro,0.77265



===== Iniciando experimento 3 =====


[Run 3] Ep 01/40 | Train Acc: 0.049 | Val Acc: 0.067 | Train Loss: 3.9160 | Val Loss: 3.5599
[Run 3] Ep 02/40 | Train Acc: 0.096 | Val Acc: 0.100 | Train Loss: 3.4126 | Val Loss: 3.2587
[Run 3] Ep 02/40 | Train Acc: 0.096 | Val Acc: 0.100 | Train Loss: 3.4126 | Val Loss: 3.2587
[Run 3] Ep 03/40 | Train Acc: 0.129 | Val Acc: 0.133 | Train Loss: 3.1493 | Val Loss: 3.2062
[Run 3] Ep 03/40 | Train Acc: 0.129 | Val Acc: 0.133 | Train Loss: 3.1493 | Val Loss: 3.2062
[Run 3] Ep 04/40 | Train Acc: 0.193 | Val Acc: 0.240 | Train Loss: 2.8510 | Val Loss: 2.6457
[Run 3] Ep 04/40 | Train Acc: 0.193 | Val Acc: 0.240 | Train Loss: 2.8510 | Val Loss: 2.6457
[Run 3] Ep 05/40 | Train Acc: 0.243 | Val Acc: 0.200 | Train Loss: 2.5899 | Val Loss: 3.1046
[Run 3] Ep 05/40 | Train Acc: 0.243 | Val Acc: 0.200 | Train Loss: 2.5899 | Val Loss: 3.1046
[Run 3] Ep 06/40 | Train Acc: 0.314 | Val Acc: 0.307 | Train Loss: 2.3783 | Val Loss: 2.7760
[Run 3] Ep 06/40 | Train Acc: 0.314 | Val Acc: 0.307 | Train Loss: 2.3

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁
train_acc,▁▁▂▂▃▃▃▄▄▄▅▅▅▅▅▆▆▇▇▇▇▇▇▇▇█▇██████
train_loss,█▇▇▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_acc,▁▁▂▃▂▄▅▄▅▅▆▆▆▅▆▆▇▇▇▇▇▆▇▇██▇▇█▇███
val_f1_macro,▁▁▂▃▂▄▅▃▅▅▆▆▆▅▆▆▇▇▇▇▇▆▇▇██▇▇█▇███
val_f1_weighted,▁▁▂▃▂▄▅▃▅▅▆▆▆▅▆▆▇▇▇▇▇▆▇▇██▇▇█▇███
val_loss,█▇▇▅▇▆▅▅▄▃▃▃▃▃▂▂▂▂▂▂▂▃▂▂▁▁▂▂▁▂▁▁▁
val_precision_macro,▁▁▂▃▂▄▅▄▆▆▆▆▆▆▇▇▇▇▇█▇▇█████▇█████
val_recall_macro,▁▁▂▃▂▄▅▄▅▅▆▆▆▅▆▆▇▇▇▇▇▆▇▇██▇▇█▇███

0,1
best_val_acc,0.70667
epoch,33
lr,0.0024
train_acc,0.95
train_loss,0.22666
val_acc,0.69667
val_f1_macro,0.69614
val_f1_weighted,0.69614
val_loss,1.08284
val_precision_macro,0.74075



===== Iniciando experimento 4 =====


[Run 4] Ep 01/40 | Train Acc: 0.049 | Val Acc: 0.057 | Train Loss: 3.8718 | Val Loss: 3.7734
[Run 4] Ep 02/40 | Train Acc: 0.110 | Val Acc: 0.137 | Train Loss: 3.2791 | Val Loss: 3.0903
[Run 4] Ep 02/40 | Train Acc: 0.110 | Val Acc: 0.137 | Train Loss: 3.2791 | Val Loss: 3.0903
[Run 4] Ep 03/40 | Train Acc: 0.174 | Val Acc: 0.223 | Train Loss: 2.9337 | Val Loss: 2.8399
[Run 4] Ep 03/40 | Train Acc: 0.174 | Val Acc: 0.223 | Train Loss: 2.9337 | Val Loss: 2.8399
[Run 4] Ep 04/40 | Train Acc: 0.199 | Val Acc: 0.197 | Train Loss: 2.7773 | Val Loss: 3.1860
[Run 4] Ep 04/40 | Train Acc: 0.199 | Val Acc: 0.197 | Train Loss: 2.7773 | Val Loss: 3.1860
[Run 4] Ep 05/40 | Train Acc: 0.278 | Val Acc: 0.277 | Train Loss: 2.4990 | Val Loss: 2.8326
[Run 4] Ep 05/40 | Train Acc: 0.278 | Val Acc: 0.277 | Train Loss: 2.4990 | Val Loss: 2.8326
[Run 4] Ep 06/40 | Train Acc: 0.293 | Val Acc: 0.373 | Train Loss: 2.3815 | Val Loss: 2.2385
[Run 4] Ep 06/40 | Train Acc: 0.293 | Val Acc: 0.373 | Train Loss: 2.3

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
train_acc,▁▁▂▂▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇██████████
train_loss,█▇▆▆▅▅▅▅▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▂▃▂▃▄▄▄▅▅▅▅▄▆▅▆▆▆▆▆▇▆▆▆▇▇▇▇▇▇▇▇████▇█▇▇
val_f1_macro,▁▂▂▂▃▄▄▄▅▄▅▅▄▆▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇████▇█▇▇
val_f1_weighted,▁▂▂▂▃▄▄▄▅▄▅▅▄▆▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇████▇█▇▇
val_loss,█▆▆▇▆▄▅▄▄▃▃▃▄▂▃▂▂▂▂▂▂▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▂▁
val_precision_macro,▁▂▂▂▄▄▄▄▅▅▆▅▆▆▆▆▆▇▇▇▇▆▆▇▇▇█▇█▇█▇████▇██▇
val_recall_macro,▁▂▃▂▃▄▄▄▅▅▅▅▄▆▅▆▆▆▆▆▇▆▆▆▇▇▇▇▇▇▇▇████▇█▇▇

0,1
best_val_acc,0.76
epoch,40
lr,0.00084
train_acc,0.96786
train_loss,0.20368
val_acc,0.69667
val_f1_macro,0.69227
val_f1_weighted,0.69227
val_loss,1.07535
val_precision_macro,0.7246



===== Iniciando experimento 5 =====


[Run 5] Ep 01/40 | Train Acc: 0.085 | Val Acc: 0.143 | Train Loss: 3.4951 | Val Loss: 3.1591
[Run 5] Ep 02/40 | Train Acc: 0.156 | Val Acc: 0.200 | Train Loss: 2.9497 | Val Loss: 2.7942
[Run 5] Ep 02/40 | Train Acc: 0.156 | Val Acc: 0.200 | Train Loss: 2.9497 | Val Loss: 2.7942
[Run 5] Ep 03/40 | Train Acc: 0.208 | Val Acc: 0.247 | Train Loss: 2.7928 | Val Loss: 2.7179
[Run 5] Ep 03/40 | Train Acc: 0.208 | Val Acc: 0.247 | Train Loss: 2.7928 | Val Loss: 2.7179
[Run 5] Ep 04/40 | Train Acc: 0.250 | Val Acc: 0.283 | Train Loss: 2.5605 | Val Loss: 2.4131
[Run 5] Ep 04/40 | Train Acc: 0.250 | Val Acc: 0.283 | Train Loss: 2.5605 | Val Loss: 2.4131
[Run 5] Ep 05/40 | Train Acc: 0.283 | Val Acc: 0.320 | Train Loss: 2.4381 | Val Loss: 2.4872
[Run 5] Ep 05/40 | Train Acc: 0.283 | Val Acc: 0.320 | Train Loss: 2.4381 | Val Loss: 2.4872
[Run 5] Ep 06/40 | Train Acc: 0.334 | Val Acc: 0.250 | Train Loss: 2.2161 | Val Loss: 2.7545
[Run 5] Ep 06/40 | Train Acc: 0.334 | Val Acc: 0.250 | Train Loss: 2.2

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
train_acc,▁▂▂▂▃▃▃▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇████████
train_loss,█▇▆▆▆▅▅▅▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_acc,▁▂▂▃▃▂▃▅▄▆▅▅▆▆▅▆▄▇▆▇▇▇▇▇██▇▇███▇██████
val_f1_macro,▁▁▂▃▃▂▃▅▄▆▅▅▆▆▅▆▄▇▆▇▇▇▇▇██▇▇███▇██████
val_f1_weighted,▁▁▂▃▃▂▃▅▄▆▅▅▆▆▅▆▄▇▆▇▇▇▇▇██▇▇███▇██████
val_loss,█▇▇▆▆▇▅▄▄▃▄▃▃▃▄▃█▂▃▂▂▂▂▂▁▁▂▂▂▁▁▂▁▁▁▂▁▁
val_precision_macro,▁▁▃▃▃▂▄▅▅▆▆▆▆▆▅▆▅▇▇▇▇▇▇▇██▇▇██████████
val_recall_macro,▁▂▂▃▃▂▃▅▄▆▅▅▆▆▅▆▄▇▆▇▇▇▇▇██▇▇███▇██████

0,1
best_val_acc,0.71333
epoch,38
lr,0.00012
train_acc,0.93143
train_loss,0.31975
val_acc,0.69667
val_f1_macro,0.69116
val_f1_weighted,0.69116
val_loss,1.08429
val_precision_macro,0.72348


## Entrenamiento Dataset Augmented Modelo B

In [2]:
# ===========================================
# ENTRENAMIENTO - MODELO B (ResNet-18 Audio)
# Dataset: data/spectrograms1/augmented
# Optimizado para GPUs pequeñas (≤ 4 GB)
# ===========================================

import os, random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import wandb

from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
from torch.amp import autocast, GradScaler
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix

# ---- 0) Setup
os.makedirs("models", exist_ok=True)
def set_seed(seed=42):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# ---- 1) Data
DATA_DIR = "data/spectrograms1/augmented"
IMG_SIZE = (224, 224)   

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

train_data = datasets.ImageFolder(f"{DATA_DIR}/train", transform=transform)
val_data   = datasets.ImageFolder(f"{DATA_DIR}/val",   transform=transform)

num_classes = len(train_data.classes)
class_names = train_data.classes
print("Clases:", num_classes)

# ---- 2) Experimentos
experiments = [
    {"optimizer": "Adam", "lr": 0.001,  "batch_size": 8,  "weight_decay": 1e-4},
    {"optimizer": "Adam", "lr": 0.0005, "batch_size": 8,  "weight_decay": 1e-4},
    {"optimizer": "Adam", "lr": 0.001,  "batch_size": 16, "weight_decay": 1e-4},
    {"optimizer": "SGD",  "lr": 0.01,   "batch_size": 8,  "weight_decay": 1e-4},
    {"optimizer": "SGD",  "lr": 0.001,  "batch_size": 8,  "weight_decay": 1e-4},
]

EPOCHS   = 40
PATIENCE = 6

# ---- 3) Loop multi-run
for i, exp in enumerate(experiments, start=1):
    # Cerrar cualquier run previo de W&B para evitar duplicados
    if wandb.run is not None:
        wandb.finish()
    
    wandb.init(
        project="esc50-resnet18-augmented",  # ← Nombre único del proyecto
        name=f"run_{i}_opt-{exp['optimizer']}_lr-{exp['lr']}_bs-{exp['batch_size']}",
        config=exp,
        reinit=True  # Permite reinicialización si hay conflictos
    )
    config = wandb.config

    train_loader = DataLoader(
        train_data, batch_size=config.batch_size, shuffle=True, num_workers=2, pin_memory=True
    )
    val_loader   = DataLoader(
        val_data,   batch_size=config.batch_size, shuffle=False, num_workers=2, pin_memory=True
    )

    # --- Modelo B (tu ResNet-18)
    model = resnet18_audio(num_classes=num_classes, in_channels=1, small_input=True).to(device)

    # --- Criterio / Optimizador / Scheduler
    criterion = nn.CrossEntropyLoss()
    if config.optimizer == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=config.weight_decay)
    else:
        optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, weight_decay=config.weight_decay)

    scheduler = StepLR(optimizer, step_size=8, gamma=0.7)
    scaler = GradScaler('cuda' if torch.cuda.is_available() else 'cpu')

    best_val_acc = 0.0
    patience_counter = 0

    # ---- Entrenamiento
    for epoch in range(EPOCHS):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()

            with autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
                outputs = model(imgs)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()

        train_acc = correct / total
        train_loss = running_loss / max(1, len(train_loader))

        # ---- Validación
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        val_y_true, val_y_pred = [], []
        
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                with autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
                    outputs = model(imgs)
                    loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (preds == labels).sum().item()
                
                val_y_true.extend(labels.cpu().tolist())
                val_y_pred.extend(preds.cpu().tolist())

        val_acc = val_correct / val_total
        val_loss = val_loss / max(1, len(val_loader))

        # --- Métricas adicionales
        prec_m, rec_m, f1_m, _ = precision_recall_fscore_support(
            val_y_true, val_y_pred, average="macro", zero_division=0
        )
        prec_w, rec_w, f1_w, _ = precision_recall_fscore_support(
            val_y_true, val_y_pred, average="weighted", zero_division=0
        )

        scheduler.step()
        torch.cuda.empty_cache()  # Liberar VRAM

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "val_loss": val_loss,
            "val_acc": val_acc,
            "val_f1_macro": f1_m,
            "val_f1_weighted": f1_w,
            "val_precision_macro": prec_m,
            "val_recall_macro": rec_m,
            "lr": scheduler.get_last_lr()[0],
            "val_confusion_matrix": wandb.plot.confusion_matrix(
                y_true=val_y_true,
                preds=val_y_pred,
                class_names=class_names
            )
        })

        print(f"[Run {i}] Epoch {epoch+1:02d}/{EPOCHS} | "
              f"Train Acc: {train_acc:.3f} | Val Acc: {val_acc:.3f} | "
              f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

        # ---- Early stopping
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), f"models/resnet18_audio_AUG_best_run{i}.pth")
            wandb.run.summary["best_val_acc"] = best_val_acc
        else:
            patience_counter += 1
            if patience_counter > PATIENCE:
                print(f"[Run {i}] Early stopping triggered at epoch {epoch+1}.")
                break

    print(f"✅ [Run {i}] Mejor Val Acc: {best_val_acc:.3f}")
    wandb.finish()




Device: cuda
Clases: 50


[34m[1mwandb[0m: Currently logged in as: [33mjavialroro[0m ([33mjavialroro-tecnologico-de-costa-rica[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[Run 1] Epoch 01/40 | Train Acc: 0.028 | Val Acc: 0.062 | Train Loss: 3.9371 | Val Loss: 3.7019
[Run 1] Epoch 02/40 | Train Acc: 0.052 | Val Acc: 0.065 | Train Loss: 3.6587 | Val Loss: 3.5678
[Run 1] Epoch 02/40 | Train Acc: 0.052 | Val Acc: 0.065 | Train Loss: 3.6587 | Val Loss: 3.5678
[Run 1] Epoch 03/40 | Train Acc: 0.059 | Val Acc: 0.095 | Train Loss: 3.5390 | Val Loss: 3.6986
[Run 1] Epoch 03/40 | Train Acc: 0.059 | Val Acc: 0.095 | Train Loss: 3.5390 | Val Loss: 3.6986
[Run 1] Epoch 04/40 | Train Acc: 0.114 | Val Acc: 0.125 | Train Loss: 3.2836 | Val Loss: 3.1744
[Run 1] Epoch 04/40 | Train Acc: 0.114 | Val Acc: 0.125 | Train Loss: 3.2836 | Val Loss: 3.1744
[Run 1] Epoch 05/40 | Train Acc: 0.128 | Val Acc: 0.188 | Train Loss: 3.1629 | Val Loss: 2.9689
[Run 1] Epoch 05/40 | Train Acc: 0.128 | Val Acc: 0.188 | Train Loss: 3.1629 | Val Loss: 2.9689
[Run 1] Epoch 06/40 | Train Acc: 0.149 | Val Acc: 0.087 | Train Loss: 2.9977 | Val Loss: 4.6198
[Run 1] Epoch 06/40 | Train Acc: 0.149 |

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
train_acc,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇██████
train_loss,█▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
val_acc,▁▁▁▂▃▁▃▃▄▄▄▄▄▅▃▅▃▅▆▅▆▆▆▆▇▇▇▇▇▇▇▆▇██▇█▇▇█
val_f1_macro,▁▁▁▂▂▁▃▃▄▄▄▄▄▅▃▅▃▅▆▅▆▆▆▆▇▇▇▇▇▇▇▆▇██▇█▇▇▇
val_f1_weighted,▁▁▁▂▂▁▃▃▄▄▄▄▄▅▃▅▃▅▆▅▆▆▆▆▇▇▇▇▇▇▇▆▇██▇█▇▇▇
val_loss,▆▆▆▅▄█▄▄▄▃▃▃▃▃▆▄▄▃▂▃▂▂▂▂▂▂▁▁▁▂▂▂▁▁▁▂▁▁▁▁
val_precision_macro,▁▁▁▂▂▂▃▂▄▄▅▄▄▅▄▅▄▆▆▆▆▆▆▇▇▇▇▇██▇▇▇▇█▇█▇██
val_recall_macro,▁▁▁▂▃▁▃▃▄▄▄▄▄▅▃▅▃▅▆▅▆▆▆▆▇▇▇▇▇▇▇▆▇██▇█▇▇█

0,1
best_val_acc,0.6125
epoch,40
lr,0.00017
train_acc,0.89083
train_loss,0.46251
val_acc,0.5875
val_f1_macro,0.55876
val_f1_weighted,0.55876
val_loss,1.45288
val_precision_macro,0.61456


[Run 2] Epoch 01/40 | Train Acc: 0.057 | Val Acc: 0.060 | Train Loss: 3.7364 | Val Loss: 3.7073
[Run 2] Epoch 02/40 | Train Acc: 0.084 | Val Acc: 0.090 | Train Loss: 3.3562 | Val Loss: 3.4327
[Run 2] Epoch 02/40 | Train Acc: 0.084 | Val Acc: 0.090 | Train Loss: 3.3562 | Val Loss: 3.4327
[Run 2] Epoch 03/40 | Train Acc: 0.111 | Val Acc: 0.152 | Train Loss: 3.2064 | Val Loss: 3.1589
[Run 2] Epoch 03/40 | Train Acc: 0.111 | Val Acc: 0.152 | Train Loss: 3.2064 | Val Loss: 3.1589
[Run 2] Epoch 04/40 | Train Acc: 0.147 | Val Acc: 0.205 | Train Loss: 3.0393 | Val Loss: 2.8981
[Run 2] Epoch 04/40 | Train Acc: 0.147 | Val Acc: 0.205 | Train Loss: 3.0393 | Val Loss: 2.8981
[Run 2] Epoch 05/40 | Train Acc: 0.145 | Val Acc: 0.182 | Train Loss: 2.9506 | Val Loss: 3.0472
[Run 2] Epoch 05/40 | Train Acc: 0.145 | Val Acc: 0.182 | Train Loss: 2.9506 | Val Loss: 3.0472
[Run 2] Epoch 06/40 | Train Acc: 0.227 | Val Acc: 0.302 | Train Loss: 2.7572 | Val Loss: 2.6592
[Run 2] Epoch 06/40 | Train Acc: 0.227 |

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▃▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁
train_acc,▁▁▂▂▂▃▃▃▄▄▄▄▅▄▅▅▅▆▆▆▆▆▆▇▇▇▇████
train_loss,█▇▇▆▆▆▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁
val_acc,▁▁▂▃▃▄▄▃▅▆▅▅▅▆▆▆▆▇▆▇▆▇▇██▇███▇█
val_f1_macro,▁▁▂▃▃▄▄▃▅▅▅▅▅▆▅▆▆▇▆▇▆▇▇██▇███▇█
val_f1_weighted,▁▁▂▃▃▄▄▃▅▅▅▅▅▆▅▆▆▇▆▇▆▇▇██▇███▇█
val_loss,█▇▆▅▆▅▄▅▃▃▃▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▂▁
val_precision_macro,▁▁▂▃▃▄▄▃▅▅▅▅▅▆▆▆▆▇▆▇▇▇▇▇▇▇█▇█▇█
val_recall_macro,▁▁▂▃▃▄▄▃▅▆▅▅▅▆▆▆▆▇▆▇▆▇▇██▇███▇█

0,1
best_val_acc,0.5725
epoch,31
lr,0.00017
train_acc,0.72917
train_loss,0.92986
val_acc,0.545
val_f1_macro,0.53383
val_f1_weighted,0.53383
val_loss,1.58241
val_precision_macro,0.62073


[Run 3] Epoch 01/40 | Train Acc: 0.037 | Val Acc: 0.040 | Train Loss: 3.8082 | Val Loss: 3.8550
[Run 3] Epoch 02/40 | Train Acc: 0.077 | Val Acc: 0.080 | Train Loss: 3.4639 | Val Loss: 3.2861
[Run 3] Epoch 02/40 | Train Acc: 0.077 | Val Acc: 0.080 | Train Loss: 3.4639 | Val Loss: 3.2861
[Run 3] Epoch 03/40 | Train Acc: 0.111 | Val Acc: 0.090 | Train Loss: 3.2120 | Val Loss: 3.2970
[Run 3] Epoch 03/40 | Train Acc: 0.111 | Val Acc: 0.090 | Train Loss: 3.2120 | Val Loss: 3.2970
[Run 3] Epoch 04/40 | Train Acc: 0.159 | Val Acc: 0.168 | Train Loss: 2.9609 | Val Loss: 2.8582
[Run 3] Epoch 04/40 | Train Acc: 0.159 | Val Acc: 0.168 | Train Loss: 2.9609 | Val Loss: 2.8582
[Run 3] Epoch 05/40 | Train Acc: 0.218 | Val Acc: 0.228 | Train Loss: 2.7222 | Val Loss: 2.7622
[Run 3] Epoch 05/40 | Train Acc: 0.218 | Val Acc: 0.228 | Train Loss: 2.7222 | Val Loss: 2.7622
[Run 3] Epoch 06/40 | Train Acc: 0.255 | Val Acc: 0.320 | Train Loss: 2.5501 | Val Loss: 2.4695
[Run 3] Epoch 06/40 | Train Acc: 0.255 |

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
train_acc,▁▁▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇███████
train_loss,█▇▇▆▆▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_acc,▁▁▂▃▃▄▅▄▄▄▄▅▅▅▅▅▇▅▆▇▇▇▆▆▇▇▇█▆██▇█▇█████▇
val_f1_macro,▁▁▁▂▃▄▄▄▃▄▄▅▅▅▅▄▇▅▆▆▇▇▆▅▇▇▇█▆██▇█▇█████▇
val_f1_weighted,▁▁▁▂▃▄▄▄▃▄▄▅▅▅▅▄▇▅▆▆▇▇▆▅▇▇▇█▆██▇█▇█████▇
val_loss,█▆▇▅▅▄▄▄▅▅▅▃▃▄▃▄▂▄▃▂▂▂▃▃▂▁▂▁▃▁▁▂▁▂▂▁▁▁▂▂
val_precision_macro,▁▁▁▂▃▄▅▄▄▄▅▅▅▆▅▆▇▆▆▇▇▇▇▆▇▇▇▇▇██▇▇██▇███▇
val_recall_macro,▁▁▂▃▃▄▅▄▄▄▄▅▅▅▅▅▇▅▆▇▇▇▆▆▇▇▇█▆██▇█▇█████▇

0,1
best_val_acc,0.62
epoch,40
lr,0.00017
train_acc,0.94083
train_loss,0.3014
val_acc,0.5725
val_f1_macro,0.55551
val_f1_weighted,0.55551
val_loss,1.56287
val_precision_macro,0.62847


[Run 4] Epoch 01/40 | Train Acc: 0.022 | Val Acc: 0.030 | Train Loss: 4.0767 | Val Loss: 3.9781
[Run 4] Epoch 02/40 | Train Acc: 0.056 | Val Acc: 0.075 | Train Loss: 3.6328 | Val Loss: 4.0524
[Run 4] Epoch 02/40 | Train Acc: 0.056 | Val Acc: 0.075 | Train Loss: 3.6328 | Val Loss: 4.0524
[Run 4] Epoch 03/40 | Train Acc: 0.112 | Val Acc: 0.083 | Train Loss: 3.3671 | Val Loss: 4.7000
[Run 4] Epoch 03/40 | Train Acc: 0.112 | Val Acc: 0.083 | Train Loss: 3.3671 | Val Loss: 4.7000
[Run 4] Epoch 04/40 | Train Acc: 0.144 | Val Acc: 0.177 | Train Loss: 3.1378 | Val Loss: 2.8800
[Run 4] Epoch 04/40 | Train Acc: 0.144 | Val Acc: 0.177 | Train Loss: 3.1378 | Val Loss: 2.8800
[Run 4] Epoch 05/40 | Train Acc: 0.183 | Val Acc: 0.145 | Train Loss: 2.9245 | Val Loss: 3.5701
[Run 4] Epoch 05/40 | Train Acc: 0.183 | Val Acc: 0.145 | Train Loss: 2.9245 | Val Loss: 3.5701
[Run 4] Epoch 06/40 | Train Acc: 0.228 | Val Acc: 0.250 | Train Loss: 2.7629 | Val Loss: 2.7424
[Run 4] Epoch 06/40 | Train Acc: 0.228 |

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁
train_acc,▁▁▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇██████
train_loss,█▇▇▆▆▆▅▅▅▄▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_acc,▁▂▂▃▂▄▄▃▄▅▅▅▅▅▆▅▆▇▇▆▇▆▆▆█▇▇██▇▇▇███
val_f1_macro,▁▁▁▂▂▃▃▃▄▅▅▅▅▅▅▅▆▇▆▅▇▆▆▆█▇▇██▇▇▇███
val_f1_weighted,▁▁▁▂▂▃▃▃▄▅▅▅▅▅▅▅▆▇▆▅▇▆▆▆█▇▇██▇▇▇███
val_loss,▆▇█▄▆▄▄▄▄▃▂▃▃▂▃▂▂▂▂▃▂▂▂▃▁▁▁▁▁▂▁▂▁▁▁
val_precision_macro,▁▁▁▂▂▃▃▃▄▅▆▆▅▅▆▆▇▇▆▆▇▆▆▆█▇▇██▇▇▇██▇
val_recall_macro,▁▂▂▃▂▄▄▃▄▅▅▅▅▅▆▅▆▇▇▆▇▆▆▆█▇▇██▇▇▇███

0,1
best_val_acc,0.6325
epoch,35
lr,0.0024
train_acc,0.97833
train_loss,0.17839
val_acc,0.63
val_f1_macro,0.61419
val_f1_weighted,0.61419
val_loss,1.52899
val_precision_macro,0.64456


[Run 5] Epoch 01/40 | Train Acc: 0.025 | Val Acc: 0.060 | Train Loss: 3.9136 | Val Loss: 3.7168
[Run 5] Epoch 02/40 | Train Acc: 0.042 | Val Acc: 0.055 | Train Loss: 3.6964 | Val Loss: 3.5477
[Run 5] Epoch 02/40 | Train Acc: 0.042 | Val Acc: 0.055 | Train Loss: 3.6964 | Val Loss: 3.5477
[Run 5] Epoch 03/40 | Train Acc: 0.061 | Val Acc: 0.072 | Train Loss: 3.5663 | Val Loss: 3.4538
[Run 5] Epoch 03/40 | Train Acc: 0.061 | Val Acc: 0.072 | Train Loss: 3.5663 | Val Loss: 3.4538
[Run 5] Epoch 04/40 | Train Acc: 0.090 | Val Acc: 0.102 | Train Loss: 3.3985 | Val Loss: 3.2162
[Run 5] Epoch 04/40 | Train Acc: 0.090 | Val Acc: 0.102 | Train Loss: 3.3985 | Val Loss: 3.2162
[Run 5] Epoch 05/40 | Train Acc: 0.118 | Val Acc: 0.138 | Train Loss: 3.2274 | Val Loss: 3.2526
[Run 5] Epoch 05/40 | Train Acc: 0.118 | Val Acc: 0.138 | Train Loss: 3.2274 | Val Loss: 3.2526
[Run 5] Epoch 06/40 | Train Acc: 0.142 | Val Acc: 0.160 | Train Loss: 3.0723 | Val Loss: 3.1945
[Run 5] Epoch 06/40 | Train Acc: 0.142 |

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,███████▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
train_acc,▁▁▁▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▆▅▆▆▆▆▆▇▇▇▇▇▇▇▇█▇█████
train_loss,█▇▇▇▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_acc,▁▁▁▂▂▃▂▃▄▄▄▄▄▅▅▄▆▅▆▆▇▆▇▇▇▇██▇▇▇▇██████▇█
val_f1_macro,▁▁▁▂▂▂▂▂▃▄▄▄▄▅▅▄▆▅▅▅▆▅▇▇▇▇█▇▇▇▇▇██████▇▇
val_f1_weighted,▁▁▁▂▂▂▂▂▃▄▄▄▄▅▅▄▆▅▅▅▆▅▇▇▇▇█▇▇▇▇▇██████▇▇
val_loss,█▇▇▆▆▆▇▆▄▄▄▄▄▄▃▅▃▃▃▄▂▃▂▂▂▂▂▁▂▂▂▁▂▁▁▁▁▁▁▁
val_precision_macro,▁▁▁▂▃▃▂▂▄▄▄▄▄▅▆▅▇▆▆▅▇▅▇▇▇█▇▇▇▇▇▇██████▇█
val_recall_macro,▁▁▁▂▂▃▂▃▄▄▄▄▄▅▅▄▆▅▆▆▇▆▇▇▇▇██▇▇▇▇██████▇█

0,1
best_val_acc,0.5125
epoch,40
lr,0.00017
train_acc,0.64917
train_loss,1.28283
val_acc,0.49
val_f1_macro,0.45799
val_f1_weighted,0.45799
val_loss,1.78888
val_precision_macro,0.53778


# Evaluacion de los modelos