spectrogram saving

In [None]:
import os
import torch
import torchaudio
import pandas as pd
import torch.nn.functional as F

# ── Settings ──────────────────────────────────────────────
SOURCE_DIR = "Dataset_2025_augmented/Train"
OUTPUT_DIR = "mel_specs_augmented"
CSV_PATH = "mel_metadata_augmented.csv"
FIXED_LEN = 112000  # 7 seconds @ 16kHz

# ── Mel-spectrogram config ────────────────────────────────
mel_transform = torchaudio.transforms.MelSpectrogram(
    sample_rate=16000,
    n_fft=1024,
    hop_length=256,
    n_mels=64
)
db_transform = torchaudio.transforms.AmplitudeToDB()

os.makedirs(OUTPUT_DIR, exist_ok=True)
metadata = []

# ── Loop through .wav files ───────────────────────────────
for i, fname in enumerate(sorted(os.listdir(SOURCE_DIR))):
    if not fname.endswith(".wav"):
        continue

    path = os.path.join(SOURCE_DIR, fname)
    waveform, sr = torchaudio.load(path)
    
    # print(f"\n📁 Processing file: {fname}")
    # print(f"🎧 Original sample rate: {sr}")
    # print(f"🔢 Original waveform shape: {waveform.shape}")

    # Pad or truncate to 7s
    if waveform.shape[1] < FIXED_LEN:
        waveform = F.pad(waveform, (0, FIXED_LEN - waveform.shape[1]))
        # print(f"📏 Padded to: {waveform.shape}")
    else:
        waveform = waveform[:, :FIXED_LEN]
        # print(f"📏 Truncated to: {waveform.shape}")

    # Compute spectrogram
    mel = mel_transform(waveform)
    spec = db_transform(mel)  # log scale
    # spec = spec.unsqueeze(0)  # → [1, 64, time]
    # print(f"📊 Mel shape: {mel.shape} → dB shape: {spec.shape}")

    # Save tensor
    save_name = fname.replace(".wav", ".pt")
    save_path = os.path.join(OUTPUT_DIR, save_name)
    torch.save(spec, save_path)
    # print(f"💾 Saved spectrogram to: {save_path}")

    # Extract label
    try:
        label = int(fname[0]) - 1
    except ValueError:
        raise ValueError(f"❌ Failed to extract label from filename: {fname}")
    # print(f"🏷️ Parsed label: {label}")

    metadata.append((save_name, label))

    if i % 100 == 0:
        print(f"✅ Processed {i} files")

# ── Save metadata CSV ─────────────────────────────────────
df = pd.DataFrame(metadata, columns=["path", "label"])
df.to_csv(CSV_PATH, index=False)
print(f"\n✅ All spectrograms saved to: {OUTPUT_DIR}")
print(f"📝 Metadata saved to: {CSV_PATH}")


✅ Processed 0 files
✅ Processed 100 files
✅ Processed 200 files
✅ Processed 300 files
✅ Processed 400 files
✅ Processed 500 files
✅ Processed 600 files
✅ Processed 700 files
✅ Processed 800 files
✅ Processed 900 files
✅ Processed 1000 files
✅ Processed 1100 files
✅ Processed 1200 files
✅ Processed 1300 files
✅ Processed 1400 files
✅ Processed 1500 files
✅ Processed 1600 files
✅ Processed 1700 files
✅ Processed 1800 files
✅ Processed 1900 files
✅ Processed 2000 files
✅ Processed 2100 files
✅ Processed 2200 files
✅ Processed 2300 files
✅ Processed 2400 files
✅ Processed 2500 files
✅ Processed 2600 files
✅ Processed 2700 files
✅ Processed 2800 files
✅ Processed 2900 files
✅ Processed 3000 files
✅ Processed 3100 files
✅ Processed 3200 files
✅ Processed 3300 files
✅ Processed 3400 files
✅ Processed 3500 files
✅ Processed 3600 files
✅ Processed 3700 files
✅ Processed 3800 files
✅ Processed 3900 files
✅ Processed 4000 files
✅ Processed 4100 files
✅ Processed 4200 files
✅ Processed 4300 files


SpectrogramCNN

In [1]:
import torch
import torch.nn as nn

class SelfAttention(nn.Module):
    def __init__(self, in_channels):
        super().__init__()
        self.query = nn.Conv1d(in_channels, in_channels, kernel_size=1)
        self.key   = nn.Conv1d(in_channels, in_channels, kernel_size=1)
        self.value = nn.Conv1d(in_channels, in_channels, kernel_size=1)
        self.scale = in_channels ** 0.5

    def forward(self, x):
        # x shape: [B, C, F, T] → squeeze F
        B, C, F, T = x.shape
        x = x.view(B, C * F, T)  # Flatten frequency into channels

        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)

        attn = torch.bmm(Q.transpose(1, 2), K) / self.scale
        attn = torch.softmax(attn, dim=-1)
        out = torch.bmm(attn, V.transpose(1, 2)).transpose(1, 2)

        out = out.view(B, C, F, T)  # Reshape back
        return out



class SpectrogramCNN(nn.Module):
    def __init__(self, num_classes=5):
        super().__init__()

        self.conv_block = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1),

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1),
        )

        # Attention block expects input of shape [B, 1024, T]
        self.attn = SelfAttention(in_channels=256 * 4)

        self.pool = nn.AdaptiveAvgPool2d((4, 4))  # Makes shape consistent before FC

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 4 * 4, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.conv_block(x)      # [B, 256, 4, T]
        # x = self.attn(x)            # [B, 256, 4, T] → enhanced via attention
        x = self.pool(x)            # [B, 256, 4, 4]
        x = self.classifier(x)      # [B, num_classes]
        return x


In [10]:
import torch
import torch.nn.functional as F

def train_one_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for x, y in dataloader:
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        preds = output.argmax(dim=1)
        correct += (preds == y).sum().item()
        total += y.size(0)

    acc = correct / total
    avg_loss = running_loss / len(dataloader)
    return avg_loss, acc


def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(device), y.to(device)
            output = model(x)
            loss = criterion(output, y)

            total_loss += loss.item()
            preds = output.argmax(dim=1)
            correct += (preds == y).sum().item()
            total += y.size(0)

    acc = correct / total
    avg_loss = total_loss / len(dataloader)
    return avg_loss, acc


In [None]:
def train_model(
    model,
    train_loader,
    val_loader,
    optimizer,
    criterion,
    device,
    max_epochs=30,
    patience=5,
    save_path="best_model.pt"
):
    best_val_acc = 0.0
    counter = 0

    for epoch in range(max_epochs):
        print(f"\n🌟 Epoch {epoch+1}/{max_epochs}")

        train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
        val_loss, val_acc = evaluate(model, val_loader, criterion, device)

        # 📊 Logging
        print(f"📈 Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
        print(f"📊 Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.4f}")

        # 💾 Checkpoint
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), save_path)
            print(f"💾 Best model saved! (Val Acc: {val_acc:.4f})")
            counter = 0
        else:
            counter += 1
            print(f"⏳ No improvement. Patience counter: {counter}/{patience}")

        # 🛑 Early stopping
        if counter >= patience:
            print("🛑 Early stopping triggered.")
            break

    print(f"\n✅ Training complete. Best Val Accuracy: {best_val_acc:.4f}")


split

In [17]:
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import torch
import os
from sklearn.model_selection import train_test_split

CSV_PATH = "mel_metadata_augmented.csv"
SPEC_DIR = "mel_specs_augmented"

# ── Load and split metadata ───────────────────────────────
df = pd.read_csv(CSV_PATH)
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

# ── Dataset class for loading .pt spectrograms ─────────────
class PrecomputedSpectrogramDataset(Dataset):
    def __init__(self, dataframe, spec_dir):
        self.df = dataframe.reset_index(drop=True)
        self.spec_dir = spec_dir

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        spec_path = os.path.join(self.spec_dir, row['path'])
        spectrogram = torch.load(spec_path)  # ← Do NOT squeeze here
        label = row['label']
        return spectrogram, label

# ── Create datasets and loaders ────────────────────────────
train_dataset = PrecomputedSpectrogramDataset(train_df, spec_dir=SPEC_DIR)
val_dataset   = PrecomputedSpectrogramDataset(val_df,   spec_dir=SPEC_DIR)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=16)

# ── Sanity check on data shape ────────────────────────────
sample_x, sample_y = next(iter(train_loader))
print(f"🔍 Sample batch shape: {sample_x.shape}")  # Expected: [32, 1, 64, T]
print(f"🧷 Sample labels: {sample_y[:5].tolist()}")

model = SpectrogramCNN(num_classes=5)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=3e-6)


🔍 Sample batch shape: torch.Size([32, 1, 64, 438])
🧷 Sample labels: [4, 1, 3, 4, 1]


In [20]:
train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=optimizer,
    criterion=criterion,
    device=device,
    max_epochs=100,
    patience=16,
    save_path="best_model_b.pt"
)



🌟 Epoch 1/100
📈 Train Loss: 0.0479 | Train Acc: 0.9888
📊 Val Loss:   0.0193 | Val Acc:   0.9968
💾 Best model saved! (Val Acc: 0.9968)

🌟 Epoch 2/100
📈 Train Loss: 0.0387 | Train Acc: 0.9900
📊 Val Loss:   0.0103 | Val Acc:   0.9963
⏳ No improvement. Patience counter: 1/16

🌟 Epoch 3/100
📈 Train Loss: 0.0365 | Train Acc: 0.9918
📊 Val Loss:   0.0157 | Val Acc:   0.9963
⏳ No improvement. Patience counter: 2/16

🌟 Epoch 4/100
📈 Train Loss: 0.0101 | Train Acc: 0.9970
📊 Val Loss:   0.0043 | Val Acc:   0.9979
💾 Best model saved! (Val Acc: 0.9979)

🌟 Epoch 5/100
📈 Train Loss: 0.0118 | Train Acc: 0.9971
📊 Val Loss:   0.0102 | Val Acc:   0.9984
💾 Best model saved! (Val Acc: 0.9984)

🌟 Epoch 6/100
📈 Train Loss: 0.0109 | Train Acc: 0.9970
📊 Val Loss:   0.0138 | Val Acc:   0.9968
⏳ No improvement. Patience counter: 1/16

🌟 Epoch 7/100
📈 Train Loss: 0.0135 | Train Acc: 0.9961
📊 Val Loss:   0.0353 | Val Acc:   0.9953
⏳ No improvement. Patience counter: 2/16

🌟 Epoch 8/100
📈 Train Loss: 0.0208 | Train

In [19]:
import os
import torch
import torchaudio
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

# ── Config ────────────────────────────────
TEST_WAV_DIR = "Test set"
MODEL_PATH = "best_model_b.pt"
OUTPUT_CSV = "submission_b.csv"
FIXED_LEN = 112000  # 7 seconds at 16kHz

# ── Log-Mel Transform ─────────────────────
mel_transform = torchaudio.transforms.MelSpectrogram(
    sample_rate=16000,
    n_fft=1024,
    hop_length=256,
    n_mels=64
)
db_transform = torchaudio.transforms.AmplitudeToDB()

# ── Test Dataset (on-the-fly spectrogram) ─────────────────
class TestWavDataset(Dataset):
    def __init__(self, folder_path):
        self.folder_path = folder_path
        self.files = sorted([f for f in os.listdir(folder_path) if f.endswith(".wav")])

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        fname = self.files[idx]
        path = os.path.join(self.folder_path, fname)

        waveform, sr = torchaudio.load(path)

        # Pad or truncate to 7s
        if waveform.shape[1] < FIXED_LEN:
            pad_len = FIXED_LEN - waveform.shape[1]
            waveform = F.pad(waveform, (0, pad_len))
        else:
            waveform = waveform[:, :FIXED_LEN]

        # Convert to log-mel spectrogram
        mel = mel_transform(waveform)
        mel_db = db_transform(mel)
        # mel_db = mel_db.unsqueeze(0)  # shape: [1, 64, T]

        file_id = os.path.splitext(fname)[0]
        return mel_db, file_id

# ── Run prediction and save submission ─────
def run_test():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = SpectrogramCNN(num_classes=5)
    model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
    model.to(device)
    model.eval()

    dataset = TestWavDataset(TEST_WAV_DIR)
    loader = DataLoader(dataset, batch_size=32)

    predictions = []

    with torch.no_grad():
        for batch, ids in loader:
            batch = batch.to(device)
            outputs = model(batch)
            preds = torch.argmax(outputs, dim=1) + 1  # [0–4] → [1–5]
            predictions.extend(zip(ids, preds.cpu().numpy()))

    df = pd.DataFrame(predictions, columns=["Id", "label"])
    df.to_csv(OUTPUT_CSV, index=False)
    print(f"✅ Saved predictions to {OUTPUT_CSV}")

# ── Entry Point ───────────────────────────
if __name__ == "__main__":
    run_test()


✅ Saved predictions to submission_b.csv


In [4]:
import os
import torch
import torchaudio
import pandas as pd
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# ── Config ────────────────────────────────
DATA_DIR = "Dataset_2025/Train"
MODEL_PATH = "best_model_b.pt"
RESULT_DIR = "results_modelB"
FIXED_LEN = 112000  # 7 seconds at 16kHz
os.makedirs(RESULT_DIR, exist_ok=True)

# ── Log-Mel Transform ─────────────────────
mel_transform = torchaudio.transforms.MelSpectrogram(
    sample_rate=16000,
    n_fft=1024,
    hop_length=256,
    n_mels=64
)
db_transform = torchaudio.transforms.AmplitudeToDB()

# ── Dataset ───────────────────────────────
class SpectrogramDataset(Dataset):
    def __init__(self, folder_path):
        self.folder_path = folder_path
        self.files = sorted([f for f in os.listdir(folder_path) if f.endswith(".wav")])

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        fname = self.files[idx]
        path = os.path.join(self.folder_path, fname)

        waveform, sr = torchaudio.load(path)
        if waveform.shape[1] < FIXED_LEN:
            pad_len = FIXED_LEN - waveform.shape[1]
            waveform = F.pad(waveform, (0, pad_len))
        else:
            waveform = waveform[:, :FIXED_LEN]

        mel = mel_transform(waveform)
        mel_db = db_transform(mel)

        file_id = os.path.splitext(fname)[0]
        group, sex = file_id[0], file_id[1]
        true_label = int(group)
        return mel_db, file_id, true_label, group, sex

# ── Evaluation ────────────────────────────
def evaluate(model, loader, device):
    model.eval()
    all_preds, all_trues, meta = [], [], []

    with torch.no_grad():
        for batch, ids, labels, groups, sexes in loader:
            batch = batch.to(device)
            outputs = model(batch)
            preds = torch.argmax(outputs, dim=1) + 1
            all_preds.extend(preds.cpu().numpy())
            all_trues.extend(labels.numpy())
            meta.extend(zip(ids, groups, sexes))

    return all_preds, all_trues, meta

# ── Plots ─────────────────────────────────
def plot_group_sex_accuracy(df):
    df["group_sex"] = df["group"] + df["sex"]

    combo_acc = df.groupby("group_sex").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")
    plt.figure(figsize=(8, 5))
    sns.barplot(data=combo_acc, x="group_sex", y="accuracy", palette="viridis")
    plt.title("🔍 Accuracy by Group and Sex")
    plt.ylabel("Accuracy")
    plt.ylim(0, 1)
    plt.grid(axis="y", linestyle="--", alpha=0.4)
    plt.tight_layout()
    plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_group_sex_modelB.png"))
    plt.close()

    group_acc = df.groupby("group").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")
    plt.figure(figsize=(6, 4))
    sns.barplot(data=group_acc, x="group", y="accuracy", palette="Blues_d")
    plt.title("🎯 Accuracy by Group")
    plt.ylabel("Accuracy")
    plt.ylim(0, 1)
    plt.grid(axis="y", linestyle="--", alpha=0.4)
    plt.tight_layout()
    plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_group_modelB.png"))
    plt.close()

    sex_acc = df.groupby("sex").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")
    plt.figure(figsize=(5, 4))
    sns.barplot(data=sex_acc, x="sex", y="accuracy", palette="Set2")
    plt.title("🧑‍🤝‍🧑 Accuracy by Sex")
    plt.ylabel("Accuracy")
    plt.ylim(0, 1)
    plt.grid(axis="y", linestyle="--", alpha=0.4)
    plt.tight_layout()
    plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_sex_modelB.png"))
    plt.close()

def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred, labels=[1,2,3,4,5])
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=[1,2,3,4,5], yticklabels=[1,2,3,4,5])
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(os.path.join(RESULT_DIR, "confusion_matrix_modelB.png"))
    plt.close()

# ── Main ─────────────────────────────────
def run_full_evaluation():
    device = "cuda" if torch.cuda.is_available() else "cpu"

    model = SpectrogramCNN(num_classes=5)
    model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
    model.to(device)

    dataset = SpectrogramDataset(DATA_DIR)
    loader = DataLoader(dataset, batch_size=32)

    preds, trues, meta = evaluate(model, loader, device)

    df = pd.DataFrame(meta, columns=["Id", "group", "sex"])
    df["label"] = preds
    df["true"] = trues
    df.to_csv(os.path.join(RESULT_DIR, "detailed_predictions_modelB.csv"), index=False)
    print(f"✅ Saved predictions to {os.path.join(RESULT_DIR, 'detailed_predictions_modelB.csv')}")

    plot_group_sex_accuracy(df)
    plot_confusion_matrix(df["true"], df["label"])
    print(f"📊 All results saved in {RESULT_DIR}")

# ── Run ──────────────────────────────────
if __name__ == "__main__":
    run_full_evaluation()


✅ Saved predictions to results_modelB\detailed_predictions_modelB.csv


  combo_acc = df.groupby("group_sex").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=combo_acc, x="group_sex", y="accuracy", palette="viridis")
  plt.tight_layout()
  plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_group_sex_modelB.png"))
  group_acc = df.groupby("group").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=group_acc, x="group", y="accuracy", palette="Blues_d")
  plt.tight_layout()
  plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_group_modelB.png"))
  sex_acc = df.groupby("sex").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")

Pa

📊 All results saved in results_modelB


model b2: this was to see if lowering the learning rate would help the model acheive 1 R2

In [22]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-6)

train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=optimizer,
    criterion=criterion,
    device=device,
    max_epochs=100,
    patience=16,
    save_path="best_model_b2.pt"
)



🌟 Epoch 1/100
📈 Train Loss: 0.0235 | Train Acc: 0.9943
📊 Val Loss:   0.1094 | Val Acc:   0.9837
💾 Best model saved! (Val Acc: 0.9837)

🌟 Epoch 2/100
📈 Train Loss: 0.0429 | Train Acc: 0.9917
📊 Val Loss:   0.0240 | Val Acc:   0.9958
💾 Best model saved! (Val Acc: 0.9958)

🌟 Epoch 3/100
📈 Train Loss: 0.0081 | Train Acc: 0.9982
📊 Val Loss:   0.0179 | Val Acc:   0.9979
💾 Best model saved! (Val Acc: 0.9979)

🌟 Epoch 4/100
📈 Train Loss: 0.0284 | Train Acc: 0.9947
📊 Val Loss:   0.0397 | Val Acc:   0.9905
⏳ No improvement. Patience counter: 1/16

🌟 Epoch 5/100
📈 Train Loss: 0.0125 | Train Acc: 0.9971
📊 Val Loss:   0.0923 | Val Acc:   0.9905
⏳ No improvement. Patience counter: 2/16

🌟 Epoch 6/100
📈 Train Loss: 0.0414 | Train Acc: 0.9926
📊 Val Loss:   0.0116 | Val Acc:   0.9974
⏳ No improvement. Patience counter: 3/16

🌟 Epoch 7/100
📈 Train Loss: 0.0084 | Train Acc: 0.9980
📊 Val Loss:   0.0256 | Val Acc:   0.9958
⏳ No improvement. Patience counter: 4/16

🌟 Epoch 8/100
📈 Train Loss: 0.0362 | Train

In [23]:
import os
import torch
import torchaudio
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

# ── Config ────────────────────────────────
TEST_WAV_DIR = "Test set"
MODEL_PATH = "best_model_b2.pt"
OUTPUT_CSV = "submission_b2.csv"
FIXED_LEN = 112000  # 7 seconds at 16kHz

# ── Log-Mel Transform ─────────────────────
mel_transform = torchaudio.transforms.MelSpectrogram(
    sample_rate=16000,
    n_fft=1024,
    hop_length=256,
    n_mels=64
)
db_transform = torchaudio.transforms.AmplitudeToDB()

# ── Test Dataset (on-the-fly spectrogram) ─────────────────
class TestWavDataset(Dataset):
    def __init__(self, folder_path):
        self.folder_path = folder_path
        self.files = sorted([f for f in os.listdir(folder_path) if f.endswith(".wav")])

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        fname = self.files[idx]
        path = os.path.join(self.folder_path, fname)

        waveform, sr = torchaudio.load(path)

        # Pad or truncate to 7s
        if waveform.shape[1] < FIXED_LEN:
            pad_len = FIXED_LEN - waveform.shape[1]
            waveform = F.pad(waveform, (0, pad_len))
        else:
            waveform = waveform[:, :FIXED_LEN]

        # Convert to log-mel spectrogram
        mel = mel_transform(waveform)
        mel_db = db_transform(mel)
        # mel_db = mel_db.unsqueeze(0)  # shape: [1, 64, T]

        file_id = os.path.splitext(fname)[0]
        return mel_db, file_id

# ── Run prediction and save submission ─────
def run_test():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = SpectrogramCNN(num_classes=5)
    model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
    model.to(device)
    model.eval()

    dataset = TestWavDataset(TEST_WAV_DIR)
    loader = DataLoader(dataset, batch_size=32)

    predictions = []

    with torch.no_grad():
        for batch, ids in loader:
            batch = batch.to(device)
            outputs = model(batch)
            preds = torch.argmax(outputs, dim=1) + 1  # [0–4] → [1–5]
            predictions.extend(zip(ids, preds.cpu().numpy()))

    df = pd.DataFrame(predictions, columns=["Id", "label"])
    df.to_csv(OUTPUT_CSV, index=False)
    print(f"✅ Saved predictions to {OUTPUT_CSV}")

# ── Entry Point ───────────────────────────
if __name__ == "__main__":
    run_test()


✅ Saved predictions to submission_b2.csv


In [3]:
import os
import torch
import torchaudio
import pandas as pd
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# ── Config ────────────────────────────────
DATA_DIR = "Dataset_2025/Train"
MODEL_PATH = "best_model_b2.pt"
RESULT_DIR = "results_modelB2"
FIXED_LEN = 112000  # 7 seconds at 16kHz
os.makedirs(RESULT_DIR, exist_ok=True)

# ── Log-Mel Transform ─────────────────────
mel_transform = torchaudio.transforms.MelSpectrogram(
    sample_rate=16000,
    n_fft=1024,
    hop_length=256,
    n_mels=64
)
db_transform = torchaudio.transforms.AmplitudeToDB()

# ── Dataset ───────────────────────────────
class SpectrogramDataset(Dataset):
    def __init__(self, folder_path):
        self.folder_path = folder_path
        self.files = sorted([f for f in os.listdir(folder_path) if f.endswith(".wav")])

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        fname = self.files[idx]
        path = os.path.join(self.folder_path, fname)

        waveform, sr = torchaudio.load(path)
        if waveform.shape[1] < FIXED_LEN:
            pad_len = FIXED_LEN - waveform.shape[1]
            waveform = F.pad(waveform, (0, pad_len))
        else:
            waveform = waveform[:, :FIXED_LEN]

        mel = mel_transform(waveform)
        mel_db = db_transform(mel)

        file_id = os.path.splitext(fname)[0]
        group, sex = file_id[0], file_id[1]
        true_label = int(group)
        return mel_db, file_id, true_label, group, sex

# ── Evaluation ────────────────────────────
def evaluate(model, loader, device):
    model.eval()
    all_preds, all_trues, meta = [], [], []

    with torch.no_grad():
        for batch, ids, labels, groups, sexes in loader:
            batch = batch.to(device)
            outputs = model(batch)
            preds = torch.argmax(outputs, dim=1) + 1
            all_preds.extend(preds.cpu().numpy())
            all_trues.extend(labels.numpy())
            meta.extend(zip(ids, groups, sexes))

    return all_preds, all_trues, meta

# ── Plots ─────────────────────────────────
def plot_group_sex_accuracy(df):
    df["group_sex"] = df["group"] + df["sex"]

    combo_acc = df.groupby("group_sex").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")
    plt.figure(figsize=(8, 5))
    sns.barplot(data=combo_acc, x="group_sex", y="accuracy", palette="viridis")
    plt.title("🔍 Accuracy by Group and Sex")
    plt.ylabel("Accuracy")
    plt.ylim(0, 1)
    plt.grid(axis="y", linestyle="--", alpha=0.4)
    plt.tight_layout()
    plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_group_sex_modelB.png"))
    plt.close()

    group_acc = df.groupby("group").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")
    plt.figure(figsize=(6, 4))
    sns.barplot(data=group_acc, x="group", y="accuracy", palette="Blues_d")
    plt.title("🎯 Accuracy by Group")
    plt.ylabel("Accuracy")
    plt.ylim(0, 1)
    plt.grid(axis="y", linestyle="--", alpha=0.4)
    plt.tight_layout()
    plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_group_modelB.png"))
    plt.close()

    sex_acc = df.groupby("sex").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")
    plt.figure(figsize=(5, 4))
    sns.barplot(data=sex_acc, x="sex", y="accuracy", palette="Set2")
    plt.title("🧑‍🤝‍🧑 Accuracy by Sex")
    plt.ylabel("Accuracy")
    plt.ylim(0, 1)
    plt.grid(axis="y", linestyle="--", alpha=0.4)
    plt.tight_layout()
    plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_sex_modelB.png"))
    plt.close()

def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred, labels=[1,2,3,4,5])
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=[1,2,3,4,5], yticklabels=[1,2,3,4,5])
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(os.path.join(RESULT_DIR, "confusion_matrix_modelB.png"))
    plt.close()

# ── Main ─────────────────────────────────
def run_full_evaluation():
    device = "cuda" if torch.cuda.is_available() else "cpu"

    model = SpectrogramCNN(num_classes=5)
    model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
    model.to(device)

    dataset = SpectrogramDataset(DATA_DIR)
    loader = DataLoader(dataset, batch_size=32)

    preds, trues, meta = evaluate(model, loader, device)

    df = pd.DataFrame(meta, columns=["Id", "group", "sex"])
    df["label"] = preds
    df["true"] = trues
    df.to_csv(os.path.join(RESULT_DIR, "detailed_predictions_modelB.csv"), index=False)
    print(f"✅ Saved predictions to {os.path.join(RESULT_DIR, 'detailed_predictions_modelB.csv')}")

    plot_group_sex_accuracy(df)
    plot_confusion_matrix(df["true"], df["label"])
    print(f"📊 All results saved in {RESULT_DIR}")

# ── Run ──────────────────────────────────
if __name__ == "__main__":
    run_full_evaluation()


✅ Saved predictions to results_modelB2\detailed_predictions_modelB.csv


  combo_acc = df.groupby("group_sex").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=combo_acc, x="group_sex", y="accuracy", palette="viridis")
  plt.tight_layout()
  plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_group_sex_modelB.png"))
  group_acc = df.groupby("group").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=group_acc, x="group", y="accuracy", palette="Blues_d")
  plt.tight_layout()
  plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_group_modelB.png"))
  sex_acc = df.groupby("sex").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")

Pa

📊 All results saved in results_modelB2


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_sex_modelB.png"))
  plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_sex_modelB.png"))


model b3: this model is to check if the base data can outperform the augumented data or not

In [24]:
# ── Settings ──────────────────────────────────────────────
SOURCE_DIR = "Dataset_2025/Train"
OUTPUT_DIR = "mel_specs"
CSV_PATH = "mel_metadata.csv"
FIXED_LEN = 112000  # 7 seconds @ 16kHz

# ── Mel-spectrogram config ────────────────────────────────
mel_transform = torchaudio.transforms.MelSpectrogram(
    sample_rate=16000,
    n_fft=1024,
    hop_length=256,
    n_mels=64
)
db_transform = torchaudio.transforms.AmplitudeToDB()

os.makedirs(OUTPUT_DIR, exist_ok=True)
metadata = []

# ── Loop through .wav files ───────────────────────────────
for i, fname in enumerate(sorted(os.listdir(SOURCE_DIR))):
    if not fname.endswith(".wav"):
        continue

    path = os.path.join(SOURCE_DIR, fname)
    waveform, sr = torchaudio.load(path)
    
    # print(f"\n📁 Processing file: {fname}")
    # print(f"🎧 Original sample rate: {sr}")
    # print(f"🔢 Original waveform shape: {waveform.shape}")

    # Pad or truncate to 7s
    if waveform.shape[1] < FIXED_LEN:
        waveform = F.pad(waveform, (0, FIXED_LEN - waveform.shape[1]))
        # print(f"📏 Padded to: {waveform.shape}")
    else:
        waveform = waveform[:, :FIXED_LEN]
        # print(f"📏 Truncated to: {waveform.shape}")

    # Compute spectrogram
    mel = mel_transform(waveform)
    spec = db_transform(mel)  # log scale
    # spec = spec.unsqueeze(0)  # → [1, 64, time]
    # print(f"📊 Mel shape: {mel.shape} → dB shape: {spec.shape}")

    # Save tensor
    save_name = fname.replace(".wav", ".pt")
    save_path = os.path.join(OUTPUT_DIR, save_name)
    torch.save(spec, save_path)
    # print(f"💾 Saved spectrogram to: {save_path}")

    # Extract label
    try:
        label = int(fname[0]) - 1
    except ValueError:
        raise ValueError(f"❌ Failed to extract label from filename: {fname}")
    # print(f"🏷️ Parsed label: {label}")

    metadata.append((save_name, label))

    if i % 100 == 0:
        print(f"✅ Processed {i} files")

# ── Save metadata CSV ─────────────────────────────────────
df = pd.DataFrame(metadata, columns=["path", "label"])
df.to_csv(CSV_PATH, index=False)
print(f"\n✅ All spectrograms saved to: {OUTPUT_DIR}")
print(f"📝 Metadata saved to: {CSV_PATH}")


✅ Processed 0 files
✅ Processed 100 files
✅ Processed 200 files
✅ Processed 300 files
✅ Processed 400 files
✅ Processed 500 files
✅ Processed 600 files
✅ Processed 700 files
✅ Processed 800 files
✅ Processed 900 files
✅ Processed 1000 files
✅ Processed 1100 files
✅ Processed 1200 files
✅ Processed 1300 files
✅ Processed 1400 files
✅ Processed 1500 files
✅ Processed 1600 files
✅ Processed 1700 files
✅ Processed 1800 files
✅ Processed 1900 files
✅ Processed 2000 files
✅ Processed 2100 files
✅ Processed 2200 files
✅ Processed 2300 files
✅ Processed 2400 files
✅ Processed 2500 files
✅ Processed 2600 files
✅ Processed 2700 files
✅ Processed 2800 files
✅ Processed 2900 files
✅ Processed 3000 files
✅ Processed 3100 files

✅ All spectrograms saved to: mel_specs
📝 Metadata saved to: mel_metadata.csv


split

In [None]:
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import torch
import os
from sklearn.model_selection import train_test_split

CSV_PATH = "mel_metadata.csv"
SPEC_DIR = "mel_specs"

# ── Load and split metadata ───────────────────────────────
df = pd.read_csv(CSV_PATH)
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

# ── Dataset class for loading .pt spectrograms ─────────────
class PrecomputedSpectrogramDataset(Dataset):
    def __init__(self, dataframe, spec_dir):
        self.df = dataframe.reset_index(drop=True)
        self.spec_dir = spec_dir

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        spec_path = os.path.join(self.spec_dir, row['path'])
        spectrogram = torch.load(spec_path)  # ← Do NOT squeeze here
        label = row['label']
        return spectrogram, label

# ── Create datasets and loaders ────────────────────────────
train_dataset = PrecomputedSpectrogramDataset(train_df, spec_dir=SPEC_DIR)
val_dataset   = PrecomputedSpectrogramDataset(val_df,   spec_dir=SPEC_DIR)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=16)

# ── Sanity check on data shape ────────────────────────────
sample_x, sample_y = next(iter(train_loader))

model = SpectrogramCNN(num_classes=5)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=2e-6)


In [26]:
train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=optimizer,
    criterion=criterion,
    device=device,
    max_epochs=100,
    patience=16,
    save_path="best_model_b3.pt"
)



🌟 Epoch 1/100
📈 Train Loss: 1.7240 | Train Acc: 0.2500
📊 Val Loss:   1.5354 | Val Acc:   0.2524
💾 Best model saved! (Val Acc: 0.2524)

🌟 Epoch 2/100
📈 Train Loss: 1.1909 | Train Acc: 0.4889
📊 Val Loss:   0.9329 | Val Acc:   0.5994
💾 Best model saved! (Val Acc: 0.5994)

🌟 Epoch 3/100
📈 Train Loss: 0.9289 | Train Acc: 0.6122
📊 Val Loss:   0.7615 | Val Acc:   0.6719
💾 Best model saved! (Val Acc: 0.6719)

🌟 Epoch 4/100
📈 Train Loss: 0.6559 | Train Acc: 0.7504
📊 Val Loss:   0.5218 | Val Acc:   0.7886
💾 Best model saved! (Val Acc: 0.7886)

🌟 Epoch 5/100
📈 Train Loss: 0.4806 | Train Acc: 0.8144
📊 Val Loss:   0.3878 | Val Acc:   0.8549
💾 Best model saved! (Val Acc: 0.8549)

🌟 Epoch 6/100
📈 Train Loss: 0.4111 | Train Acc: 0.8523
📊 Val Loss:   0.6025 | Val Acc:   0.7997
⏳ No improvement. Patience counter: 1/16

🌟 Epoch 7/100
📈 Train Loss: 0.3758 | Train Acc: 0.8582
📊 Val Loss:   0.4834 | Val Acc:   0.8628
💾 Best model saved! (Val Acc: 0.8628)

🌟 Epoch 8/100
📈 Train Loss: 0.3475 | Train Acc: 0.8

In [27]:

# ── Config ────────────────────────────────
TEST_WAV_DIR = "Test set"
MODEL_PATH = "best_model_b3.pt"
OUTPUT_CSV = "submission_b3.csv"
FIXED_LEN = 112000  # 7 seconds at 16kHz

# ── Log-Mel Transform ─────────────────────
mel_transform = torchaudio.transforms.MelSpectrogram(
    sample_rate=16000,
    n_fft=1024,
    hop_length=256,
    n_mels=64
)
db_transform = torchaudio.transforms.AmplitudeToDB()

# ── Test Dataset (on-the-fly spectrogram) ─────────────────
class TestWavDataset(Dataset):
    def __init__(self, folder_path):
        self.folder_path = folder_path
        self.files = sorted([f for f in os.listdir(folder_path) if f.endswith(".wav")])

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        fname = self.files[idx]
        path = os.path.join(self.folder_path, fname)

        waveform, sr = torchaudio.load(path)

        # Pad or truncate to 7s
        if waveform.shape[1] < FIXED_LEN:
            pad_len = FIXED_LEN - waveform.shape[1]
            waveform = F.pad(waveform, (0, pad_len))
        else:
            waveform = waveform[:, :FIXED_LEN]

        # Convert to log-mel spectrogram
        mel = mel_transform(waveform)
        mel_db = db_transform(mel)
        # mel_db = mel_db.unsqueeze(0)  # shape: [1, 64, T]

        file_id = os.path.splitext(fname)[0]
        return mel_db, file_id

# ── Run prediction and save submission ─────
def run_test():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = SpectrogramCNN(num_classes=5)
    model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
    model.to(device)
    model.eval()

    dataset = TestWavDataset(TEST_WAV_DIR)
    loader = DataLoader(dataset, batch_size=32)

    predictions = []

    with torch.no_grad():
        for batch, ids in loader:
            batch = batch.to(device)
            outputs = model(batch)
            preds = torch.argmax(outputs, dim=1) + 1  # [0–4] → [1–5]
            predictions.extend(zip(ids, preds.cpu().numpy()))

    df = pd.DataFrame(predictions, columns=["Id", "label"])
    df.to_csv(OUTPUT_CSV, index=False)
    print(f"✅ Saved predictions to {OUTPUT_CSV}")

# ── Entry Point ───────────────────────────
if __name__ == "__main__":
    run_test()


✅ Saved predictions to submission_b3.csv


In [5]:
import os
import torch
import torchaudio
import pandas as pd
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# ── Config ────────────────────────────────
DATA_DIR = "Dataset_2025/Train"
MODEL_PATH = "best_model_b3.pt"
RESULT_DIR = "results_modelB3"
FIXED_LEN = 112000  # 7 seconds at 16kHz
os.makedirs(RESULT_DIR, exist_ok=True)

# ── Log-Mel Transform ─────────────────────
mel_transform = torchaudio.transforms.MelSpectrogram(
    sample_rate=16000,
    n_fft=1024,
    hop_length=256,
    n_mels=64
)
db_transform = torchaudio.transforms.AmplitudeToDB()

# ── Dataset ───────────────────────────────
class SpectrogramDataset(Dataset):
    def __init__(self, folder_path):
        self.folder_path = folder_path
        self.files = sorted([f for f in os.listdir(folder_path) if f.endswith(".wav")])

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        fname = self.files[idx]
        path = os.path.join(self.folder_path, fname)

        waveform, sr = torchaudio.load(path)
        if waveform.shape[1] < FIXED_LEN:
            pad_len = FIXED_LEN - waveform.shape[1]
            waveform = F.pad(waveform, (0, pad_len))
        else:
            waveform = waveform[:, :FIXED_LEN]

        mel = mel_transform(waveform)
        mel_db = db_transform(mel)

        file_id = os.path.splitext(fname)[0]
        group, sex = file_id[0], file_id[1]
        true_label = int(group)
        return mel_db, file_id, true_label, group, sex

# ── Evaluation ────────────────────────────
def evaluate(model, loader, device):
    model.eval()
    all_preds, all_trues, meta = [], [], []

    with torch.no_grad():
        for batch, ids, labels, groups, sexes in loader:
            batch = batch.to(device)
            outputs = model(batch)
            preds = torch.argmax(outputs, dim=1) + 1
            all_preds.extend(preds.cpu().numpy())
            all_trues.extend(labels.numpy())
            meta.extend(zip(ids, groups, sexes))

    return all_preds, all_trues, meta

# ── Plots ─────────────────────────────────
def plot_group_sex_accuracy(df):
    df["group_sex"] = df["group"] + df["sex"]

    combo_acc = df.groupby("group_sex").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")
    plt.figure(figsize=(8, 5))
    sns.barplot(data=combo_acc, x="group_sex", y="accuracy", palette="viridis")
    plt.title("🔍 Accuracy by Group and Sex")
    plt.ylabel("Accuracy")
    plt.ylim(0, 1)
    plt.grid(axis="y", linestyle="--", alpha=0.4)
    plt.tight_layout()
    plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_group_sex_modelB.png"))
    plt.close()

    group_acc = df.groupby("group").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")
    plt.figure(figsize=(6, 4))
    sns.barplot(data=group_acc, x="group", y="accuracy", palette="Blues_d")
    plt.title("🎯 Accuracy by Group")
    plt.ylabel("Accuracy")
    plt.ylim(0, 1)
    plt.grid(axis="y", linestyle="--", alpha=0.4)
    plt.tight_layout()
    plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_group_modelB.png"))
    plt.close()

    sex_acc = df.groupby("sex").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")
    plt.figure(figsize=(5, 4))
    sns.barplot(data=sex_acc, x="sex", y="accuracy", palette="Set2")
    plt.title("🧑‍🤝‍🧑 Accuracy by Sex")
    plt.ylabel("Accuracy")
    plt.ylim(0, 1)
    plt.grid(axis="y", linestyle="--", alpha=0.4)
    plt.tight_layout()
    plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_sex_modelB.png"))
    plt.close()

def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred, labels=[1,2,3,4,5])
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=[1,2,3,4,5], yticklabels=[1,2,3,4,5])
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(os.path.join(RESULT_DIR, "confusion_matrix_modelB.png"))
    plt.close()

# ── Main ─────────────────────────────────
def run_full_evaluation():
    device = "cuda" if torch.cuda.is_available() else "cpu"

    model = SpectrogramCNN(num_classes=5)
    model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
    model.to(device)

    dataset = SpectrogramDataset(DATA_DIR)
    loader = DataLoader(dataset, batch_size=32)

    preds, trues, meta = evaluate(model, loader, device)

    df = pd.DataFrame(meta, columns=["Id", "group", "sex"])
    df["label"] = preds
    df["true"] = trues
    df.to_csv(os.path.join(RESULT_DIR, "detailed_predictions_modelB.csv"), index=False)
    print(f"✅ Saved predictions to {os.path.join(RESULT_DIR, 'detailed_predictions_modelB.csv')}")

    plot_group_sex_accuracy(df)
    plot_confusion_matrix(df["true"], df["label"])
    print(f"📊 All results saved in {RESULT_DIR}")

# ── Run ──────────────────────────────────
if __name__ == "__main__":
    run_full_evaluation()


✅ Saved predictions to results_modelB3\detailed_predictions_modelB.csv


  combo_acc = df.groupby("group_sex").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=combo_acc, x="group_sex", y="accuracy", palette="viridis")
  plt.tight_layout()
  plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_group_sex_modelB.png"))
  group_acc = df.groupby("group").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=group_acc, x="group", y="accuracy", palette="Blues_d")
  plt.tight_layout()
  plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_group_modelB.png"))
  sex_acc = df.groupby("sex").apply(lambda x: (x.label == x.true).mean()).reset_index(name="accuracy")

Pa

📊 All results saved in results_modelB3


  plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_sex_modelB.png"))
  plt.savefig(os.path.join(RESULT_DIR, "accuracy_by_sex_modelB.png"))
