In [7]:
# data loading
from sklearn.preprocessing import LabelEncoder
import os
import torch
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def load_dataset(base_dir):
    features = []
    labels = []
    for genre in os.listdir(base_dir):
        genre_dir = os.path.join(base_dir, genre)
        if not os.path.isdir(genre_dir):
            continue
        for fname in os.listdir(genre_dir):
            if fname.endswith('.pt'):
                path = os.path.join(genre_dir, fname)
                tensor = torch.load(path).flatten().numpy()
                features.append(tensor)
                labels.append(genre)
    return np.array(features), np.array(labels)

#file_dir = "D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/train_set/processed_spectrograms_v2"
file_dir = "D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/train_set_augmented/Multiple files/augmented_set"

X_train, y_train = load_dataset(file_dir + "/train")
X_val, y_val = load_dataset(file_dir + "/val")

# Encode labels
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_val = le.transform(y_val)

In [8]:
# Add after loading datasets in the first cell
# Calculate normalization parameters from training data
train_mean = X_train.mean()
train_std = X_train.std()

# Apply normalization
X_train = (X_train - train_mean) / train_std
X_val = (X_val - train_mean) / train_std

# Store for test data normalization
normalization_params = {'mean': train_mean, 'std': train_std}

In [9]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset, SubsetRandomSampler

def get_data_loader(
    target_classes,
    batch_size=64,
    random_seed=1000
):
    """
    Adapted for music genre classification using pre-processed .pt files
    Separates 35 RANDOM songs from each class in VALIDATION data for testing

    Args:
        target_classes: List of genre classes to include (or None for all)
        batch_size: samples per batch
        random_seed: for reproducible results

    Returns:
        train_loader, val_loader, test_loader, classes
    """

    global X_train, X_val, y_train, y_val, le

    torch.manual_seed(random_seed)
    np.random.seed(random_seed)

    # Convert to PyTorch tensors (labels are already encoded)
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.LongTensor(y_train)
    X_val_tensor = torch.FloatTensor(X_val)
    y_val_tensor = torch.LongTensor(y_val)

    # Reshape to (N, 1, 128, 128)
    X_train_tensor = X_train_tensor.view(-1, 1, 128, 128)
    X_val_tensor = X_val_tensor.view(-1, 1, 128, 128)

    # ===== Create TEST SET: 35 RANDOM songs from each class in VALIDATION data =====
    test_indices = []
    remaining_val_indices = []
    
    for class_idx in range(12):  # 12 classes
        # Find all samples of this class in VALIDATION data
        class_indices = np.where(y_val == class_idx)[0]
        
        # Shuffle the indices for this class
        shuffled_class_indices = np.random.permutation(class_indices)
        
        # Take first 35 samples from shuffled indices for test (or all if less than 35)
        n_test_samples = min(35, len(shuffled_class_indices))
        test_class_indices = shuffled_class_indices[:n_test_samples]
        remaining_class_indices = shuffled_class_indices[n_test_samples:]
        
        test_indices.extend(test_class_indices.tolist())
        remaining_val_indices.extend(remaining_class_indices.tolist())
        
        print(f"Class {le.classes_[class_idx]}: {len(class_indices)} total, {n_test_samples} for test (random), {len(remaining_class_indices)} remaining for val")
    
    # Create test set from selected validation samples
    X_test_tensor = X_val_tensor[test_indices]
    y_test_tensor = y_val_tensor[test_indices]
    
    # Create new validation set with remaining samples (if any)
    if remaining_val_indices:
        X_val_reduced = X_val_tensor[remaining_val_indices]
        y_val_reduced = y_val_tensor[remaining_val_indices]
    else:
        # If no samples remain, create empty validation set
        X_val_reduced = torch.empty(0, 1, 128, 128)
        y_val_reduced = torch.empty(0, dtype=torch.long)

    # Create datasets
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_reduced, y_val_reduced)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) if len(val_dataset) > 0 else None
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    classes = le.classes_.tolist()

    print(f"\nLoaded {len(train_dataset)} training, {len(val_dataset)} validation, {len(test_dataset)} test")
    print(f"Test set: 35 random samples per class Ã— 12 classes = {len(test_dataset)} total")
    print(f"Classes: {classes}")

    return train_loader, val_loader, test_loader, classes

In [10]:
'''import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset, SubsetRandomSampler

def get_data_loader(
    target_classes,
    batch_size=64,
    random_seed=1000
):
    """
    Adapted for music genre classification using pre-processed .pt files

    Args:
        target_classes: List of genre classes to include (or None for all)
        batch_size: samples per batch
        random_seed: for reproducible results

    Returns:
        train_loader, val_loader, test_loader, classes
    """

    global X_train, X_val, y_train, y_val, le

    torch.manual_seed(random_seed)
    np.random.seed(random_seed)

    # Convert to PyTorch tensors (labels are already encoded)
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.LongTensor(y_train)
    X_val_tensor = torch.FloatTensor(X_val)
    y_val_tensor = torch.LongTensor(y_val)

    # Reshape to (N, 1, 128, 128)
    X_train_tensor = X_train_tensor.view(-1, 1, 128, 128)
    X_val_tensor = X_val_tensor.view(-1, 1, 128, 128)

    # ===== Create TEST SET from first 35 frames of each training sample =====
    # First 35 frames along last dimension (time)
    X_test_tensor = X_train_tensor[:, :, :, :35]
    y_test_tensor = y_train_tensor.clone()

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    classes = le.classes_.tolist()

    print(f"Loaded {len(train_dataset)} training, {len(val_dataset)} validation, {len(test_dataset)} test")
    print(f"Classes: {classes}")

    return train_loader, val_loader, test_loader, classes'''

'import numpy as np\nimport torch\nfrom torch.utils.data import DataLoader, TensorDataset, SubsetRandomSampler\n\ndef get_data_loader(\n    target_classes,\n    batch_size=64,\n    random_seed=1000\n):\n    """\n    Adapted for music genre classification using pre-processed .pt files\n\n    Args:\n        target_classes: List of genre classes to include (or None for all)\n        batch_size: samples per batch\n        random_seed: for reproducible results\n\n    Returns:\n        train_loader, val_loader, test_loader, classes\n    """\n\n    global X_train, X_val, y_train, y_val, le\n\n    torch.manual_seed(random_seed)\n    np.random.seed(random_seed)\n\n    # Convert to PyTorch tensors (labels are already encoded)\n    X_train_tensor = torch.FloatTensor(X_train)\n    y_train_tensor = torch.LongTensor(y_train)\n    X_val_tensor = torch.FloatTensor(X_val)\n    y_val_tensor = torch.LongTensor(y_val)\n\n    # Reshape to (N, 1, 128, 128)\n    X_train_tensor = X_train_tensor.view(-1, 1, 12

In [11]:
def evaluate_multiclass(model, dataloader, criterion):
    model.eval()
    total_loss = 0.0
    total_err = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            preds = torch.argmax(outputs, dim=1)
            total_err += (preds != labels).sum().item()
            total_loss += loss.item()
            total_samples += labels.size(0)

    avg_err = total_err / total_samples
    avg_loss = total_loss / len(dataloader)
    return avg_err, avg_loss

from sklearn.metrics import classification_report

def evaluate_per_class(model, dataloader, le, criterion):
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0.0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            preds = torch.argmax(outputs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    report = classification_report(all_labels, all_preds, target_names=le.classes_, zero_division=0)
    print(f"\nTest Set Classification Report (Avg Loss: {avg_loss:.4f}):\n")
    print(report)

def get_model_name(name, batch_size, learning_rate, epoch, base_dir="models"):
    # Create base directory if it doesn't exist
    os.makedirs(base_dir, exist_ok=True)

    # Format model path
    path = os.path.join(base_dir, "model_{0}_bs{1}_lr{2}_epoch{3}.pt".format(
        name, batch_size, learning_rate, epoch))

    return path

Meefo Stuff

In [12]:
#stuff for model
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
import math

# normalize data (penalizing weights so to speak)
from sklearn.utils.class_weight import compute_class_weight

# activation function
import numpy as np
from scipy.special import erf

#(dont use this for now; guarantee values w/ GELU first)
def activation(x):
  return -x * erf(np.exp(-x))

# pooling
def pool(dim, kernel, stride = 1, padding = 0):
  out = ((dim + 2*padding - kernel) // stride ) + 1
  return out

# residual block (prevents vanishing gradients)
class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super(ResidualBlock, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(channels, channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(channels),
            nn.GELU(),
            nn.Conv2d(channels, channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(channels)
        )
        self.activation = nn.GELU()

    def forward(self, x):
        return self.activation(x + self.conv(x))  # skip connection
    
# inception block (test multiple kernel sizes)
class MiniInception(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(MiniInception, self).__init__()
        self.branch1 = nn.Conv2d(in_channels, out_channels // 4, kernel_size=1)
        self.branch3 = nn.Conv2d(in_channels, out_channels // 4, kernel_size=3, padding=1)
        self.branch5 = nn.Conv2d(in_channels, out_channels // 4, kernel_size=5, padding=2)
        self.branch_pool = nn.Conv2d(in_channels, out_channels // 4, kernel_size=1)

        self.activation = nn.GELU()
        self.bn = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        b1 = self.branch1(x)
        b3 = self.branch3(x)
        b5 = self.branch5(x)
        bp = F.max_pool2d(x, kernel_size=3, stride=1, padding=1)
        bp = self.branch_pool(bp)

        out = torch.cat([b1, b3, b5, bp], dim=1)
        return self.activation(self.bn(out))


# model
class ConvBNAct(nn.Module):
    def __init__(self, in_ch, out_ch, k, s=1, p=None):
        super().__init__()
        if isinstance(k, int):
            k = (k, k)
        if p is None:
            p = (k[0] // 2, k[1] // 2)
        self.conv = nn.Conv2d(in_ch, out_ch, kernel_size=k, stride=s, padding=p, bias=False)
        self.bn   = nn.BatchNorm2d(out_ch)
        self.act  = nn.GELU()
    def forward(self, x):
        return self.act(self.bn(self.conv(x)))

class GlizzyNetTiny(nn.Module):
    """
    3 conv blocks, large temporal kernels, optional MiniInception in block1.
    Input: (B, 1, Freq, Time) e.g., 128,128
    """
    def __init__(self, num_classes: int, dropout: float = 0.3, base_ch: int = 48, use_inception_first: bool = False):
        super().__init__()
        self.name = "GlizzyNetTiny"

        # Block 1
        if use_inception_first:
            block1_feat = nn.Sequential(
                MiniInception(1, base_ch),
                nn.BatchNorm2d(base_ch),
                nn.GELU()
            )
        else:
            block1_feat = ConvBNAct(1, base_ch, k=(5,15))  # wider in time

        self.block1 = nn.Sequential(
            block1_feat,
            nn.MaxPool2d(2,2),
            nn.Dropout2d(dropout * 0.5),
        )

        # Block 2
        self.block2 = nn.Sequential(
            ConvBNAct(base_ch, base_ch * 2, k=(3,11)),
            nn.MaxPool2d(2,2),
            nn.Dropout2d(dropout * 0.75),
        )

        # Block 3
        self.block3 = nn.Sequential(
            ConvBNAct(base_ch * 2, base_ch * 3, k=(3,7)),
            nn.MaxPool2d(2,2),
            nn.Dropout2d(dropout),
        )

        # Head
        self.gap = nn.AdaptiveAvgPool2d((1,1))
        self.fc  = nn.Linear(base_ch * 3, num_classes)

        self._init_weights()

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
            elif isinstance(m, nn.Linear):
                nn.init.kaiming_uniform_(m.weight, a=math.sqrt(5))
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.gap(x)
        x = torch.flatten(x, 1)
        return self.fc(x)
    
def train_net(net, batch_size=64, learning_rate=0.01, num_epochs=30, target_classes=None, y_train=None):
    ########################################################################
    # Train a classifier on music genres
    if target_classes is None:
        raise ValueError("target_classes must be specified for music genre classification")

    torch.manual_seed(1000)

    ########################################################################
    # Obtain the PyTorch data loader objects to load batches of the datasets
    global train_loader, val_loader, test_loader, classes
    train_loader, val_loader, test_loader, classes = get_data_loader(target_classes, batch_size)

    ########################################################################
    # Define the Loss function and optimizer

    # Use weighted loss
    '''class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
    class_weights_tensor = torch.FloatTensor(class_weights).to(device)'''
    criterion = nn.CrossEntropyLoss()
    # optimizer
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)

    ########################################################################
    # Set up arrays to store training/validation metrics
    train_err = np.zeros(num_epochs)
    train_loss = np.zeros(num_epochs)
    val_err = np.zeros(num_epochs)
    val_loss = np.zeros(num_epochs)

    ########################################################################
    # Train the network
    start_time = time.time()
    for epoch in range(num_epochs):
        net.train()
        total_train_loss = 0.0
        total_train_err = 0.0
        total_samples = 0

        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = net(inputs)  # shape: [batch_size, num_classes]
            loss = criterion(outputs, labels)  # labels are class indices [0, ..., C-1]
            loss.backward()
            optimizer.step()

            # Compute number of incorrect predictions
            preds = torch.argmax(outputs, dim=1)
            total_train_err += (preds != labels).sum().item()
            total_train_loss += loss.item()
            total_samples += labels.size(0)

        train_err[epoch] = total_train_err / total_samples
        train_loss[epoch] = total_train_loss / (i + 1)

        # Evaluate on validation set
        val_err[epoch], val_loss[epoch] = evaluate_multiclass(net, val_loader, criterion)

        print(f"Epoch {epoch+1}: "
              f"Train err: {train_err[epoch]:.4f}, Train loss: {train_loss[epoch]:.4f} | "
              f"Val err: {val_err[epoch]:.4f}, Val loss: {val_loss[epoch]:.4f}")

        # Save checkpoint
        model_path = get_model_name(net.name, batch_size, learning_rate, epoch)
        torch.save(net.state_dict(), model_path)

    print('Finished Training')
    print(f"Total time elapsed: {time.time() - start_time:.2f} seconds")

    # Save logs for plotting
    np.savetxt(f"{model_path}_train_err.csv", train_err)
    np.savetxt(f"{model_path}_train_loss.csv", train_loss)
    np.savetxt(f"{model_path}_val_err.csv", val_err)
    np.savetxt(f"{model_path}_val_loss.csv", val_loss)

    # Save final model
    final_model_path = get_model_name(net.name, batch_size, learning_rate, 'final')
    torch.save(net.state_dict(), final_model_path)
    print(f"Final model saved to {final_model_path}")

In [13]:
# Training Curve (borrowed from lab 2)
def plot_training_curve(path):
    """ Plots the training curve for a model run, given the csv files
    containing the train/validation error/loss.
    """
    import matplotlib.pyplot as plt
    train_err = np.loadtxt("{}_train_err.csv".format(path))
    val_err = np.loadtxt("{}_val_err.csv".format(path))
    train_loss = np.loadtxt("{}_train_loss.csv".format(path))
    val_loss = np.loadtxt("{}_val_loss.csv".format(path))

    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.title("Train vs Validation Error")
    n = len(train_err)
    plt.plot(range(1, n+1), train_err, label="Train")
    plt.plot(range(1, n+1), val_err, label="Validation")
    plt.xlabel("Epoch")
    plt.ylabel("Error")
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.title("Train vs Validation Loss")
    plt.plot(range(1, n+1), train_loss, label="Train")
    plt.plot(range(1, n+1), val_loss, label="Validation")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()


Meefo Stuff

In [14]:
print(torch.cuda.is_available())  # Should be True
print(torch.cuda.current_device())  # Should return 0
print(torch.cuda.get_device_name(0))  # Should return something like RTX 3060
print(torch.cuda.memory_allocated())  # Should be >0 during training
print(torch.cuda.memory_reserved())  # Also should be >0

True
0
NVIDIA GeForce RTX 3060
14452224
23068672


In [15]:
target_classes = le.classes_.tolist()


# Run MEEFO
'''
best_params = run_meefo_optimization(fitness_function, search_space, pop_size=10, max_iter=20)

print("\nBest Hyperparameters from MEEFO:")
print(best_params)
'''

# Final training run with best parameters

best_params = {
    'lr': 0.000868,
    'dropout': 0.442,
    'hidden_size': 87,
    'batch_size': 95
}


net = GlizzyNetTiny(num_classes=12, dropout=best_params['dropout'], base_ch=best_params['hidden_size'], use_inception_first=False)
checkpoint_path = "D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/models/model_GlizzyNetTiny_bs95_lr0.000868_epoch601.pt"
state_dict = torch.load(checkpoint_path, map_location='cpu')
net.load_state_dict(state_dict)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = net.to(device)

'''train_net(
    net,
    batch_size=int(best_params['batch_size']),
    learning_rate=best_params['lr'],
    num_epochs=20,
    target_classes=target_classes,
    y_train=y_train
)'''



"train_net(\n    net,\n    batch_size=int(best_params['batch_size']),\n    learning_rate=best_params['lr'],\n    num_epochs=20,\n    target_classes=target_classes,\n    y_train=y_train\n)"

In [16]:
final_model_path = get_model_name(net.name, int(best_params['batch_size']), best_params['lr'], 16)
torch.save(net.state_dict(), final_model_path)
'''
net = GlizzyNet(num_classes=12, dropout=best_params['dropout'], use_inception_first=False)
net = net.to(device)

# Load the saved model weights
net.load_state_dict(torch.load(final_model_path))

# Continue training
train_net(
    net,
    batch_size=int(best_params['batch_size']),
    learning_rate=best_params['lr'],  # You can adjust the learning rate if needed
    num_epochs=300,  # Specify additional epochs #+600 (total 475+600;, so i will run for 1500) is safe
    target_classes=target_classes,
    y_train=y_train
)

# Save the model again after additional training
new_model_path = get_model_name(net.name, int(best_params['batch_size']), best_params['lr'], 574)  # Update epoch number
torch.save(net.state_dict(), new_model_path)
print(f"Model saved to {new_model_path}")'''


'\nnet = GlizzyNet(num_classes=12, dropout=best_params[\'dropout\'], use_inception_first=False)\nnet = net.to(device)\n\n# Load the saved model weights\nnet.load_state_dict(torch.load(final_model_path))\n\n# Continue training\ntrain_net(\n    net,\n    batch_size=int(best_params[\'batch_size\']),\n    learning_rate=best_params[\'lr\'],  # You can adjust the learning rate if needed\n    num_epochs=300,  # Specify additional epochs #+600 (total 475+600;, so i will run for 1500) is safe\n    target_classes=target_classes,\n    y_train=y_train\n)\n\n# Save the model again after additional training\nnew_model_path = get_model_name(net.name, int(best_params[\'batch_size\']), best_params[\'lr\'], 574)  # Update epoch number\ntorch.save(net.state_dict(), new_model_path)\nprint(f"Model saved to {new_model_path}")'

In [None]:
'''final_model_path = get_model_name(net.name, int(best_params['batch_size']), best_params['lr'], 16)
plot_training_curve(final_model_path)'''

FileNotFoundError: models\model_GlizzyNetTiny_bs95_lr0.000868_epoch16.pt_train_err.csv not found.

In [18]:
'''# Load your BEST trained model (epoch 601)
best_params = {
    'lr': 0.000868,
    'dropout': 0.442,
    'hidden_size': 87,
    'batch_size': 95
}

# Create model with correct architecture
net = GlizzyNetTiny(
    num_classes=12, 
    dropout=best_params['dropout'], 
    base_ch=best_params['hidden_size'], 
    use_inception_first=False
)

# Load your BEST checkpoint (epoch 601)
checkpoint_path = "D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/models/model_GlizzyNetTiny_bs95_lr0.000868_epoch16.pt"
state_dict = torch.load(checkpoint_path, map_location='cpu')
net.load_state_dict(state_dict)
net = net.to(device)

# Get data loaders - test_loader contains 35 samples per class FROM VALIDATION DATA
target_classes = le.classes_.tolist()
train_loader, val_loader, test_loader, classes = get_data_loader(
    target_classes, 
    batch_size=95
)

# Evaluate on TEST data (35 samples per class from validation set - NEVER seen during training)
criterion = nn.CrossEntropyLoss()
print("=== Performance on Test Data (35 samples per class from validation set) ===")
evaluate_per_class(net, test_loader, le, criterion)

# Optional: Also evaluate on remaining validation data (if any exists)
if val_loader is not None and len(val_loader.dataset) > 0:
    print(f"\n=== Performance on Remaining Validation Data ({len(val_loader.dataset)} samples) ===")
    evaluate_per_class(net, val_loader, le, criterion)
else:
    print("\n=== No remaining validation data (all used for test set) ===")'''


'# Load your BEST trained model (epoch 601)\nbest_params = {\n    \'lr\': 0.000868,\n    \'dropout\': 0.442,\n    \'hidden_size\': 87,\n    \'batch_size\': 95\n}\n\n# Create model with correct architecture\nnet = GlizzyNetTiny(\n    num_classes=12, \n    dropout=best_params[\'dropout\'], \n    base_ch=best_params[\'hidden_size\'], \n    use_inception_first=False\n)\n\n# Load your BEST checkpoint (epoch 601)\ncheckpoint_path = "D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/models/model_GlizzyNetTiny_bs95_lr0.000868_epoch16.pt"\nstate_dict = torch.load(checkpoint_path, map_location=\'cpu\')\nnet.load_state_dict(state_dict)\nnet = net.to(device)\n\n# Get data loaders - test_loader contains 35 samples per class FROM VALIDATION DATA\ntarget_classes = le.classes_.tolist()\ntrain_loader, val_loader, test_loader, classes = get_data_loader(\n    target_classes, \n    batch_size=95\n)\n\n# Evaluate on TEST data (35 samples per class from validation set - NEVER seen du

In [None]:
'''# work on test set
def load_test_data(test_dir, le, batch_size=64):
    """
    Load test data using the existing LabelEncoder
    
    Args:
        test_dir: Path to directory containing test data
        le: Existing LabelEncoder instance
        batch_size: samples per batch
    
    Returns:
        test_loader: DataLoader for test set
    """
    # Load test data
    X_test, y_test = load_dataset(test_dir)

    global normalization_params
    X_test = (X_test - normalization_params['mean']) / normalization_params['std']
    
    # Encode labels using existing label encoder
    y_test_encoded = le.transform(y_test)  # Use transform() not fit_transform()
    
    # Convert to PyTorch tensors
    X_test_tensor = torch.FloatTensor(X_test)
    y_test_tensor = torch.LongTensor(y_test_encoded)
    
    # Reshape to 128x128 images
    X_test_tensor = X_test_tensor.view(-1, 1, 128, 128)

    # Create TensorDataset and DataLoader
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    print(f"Loaded {len(test_dataset)} test samples")
    print(f"Classes in test set: {le.classes_.tolist()}")
    
    return test_loader

#plot_training_curve(final_model_path)

#test_dir = "D:\\Documents\\A-APS360\\Music_Genre_Classification\\Documents\\APS360\\Project\\test-set-random\\test set random\\test_updated"
#test_dir = "D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/test-set-random-small/test set random/test_updated"
#test_dir = "D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/test-fma/Multiple files/test_fma"
#test_dir = "D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/test-fma-small/Multiple files/test_fma"

# Use weighted loss
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights_tensor = torch.FloatTensor(class_weights).to(device)
criterion = nn.CrossEntropyLoss()

# Load the saved model weights
#net.load_state_dict(torch.load(final_model_path))

# Move the model to the correct device
print(f"Using trained model: {net.name}")
net.eval()
net = net.to(device)

print(f"Loaded the best model from {final_model_path}")
evaluate_per_class(net, test_loader, le, criterion)'''

SyntaxError: invalid syntax (1623479374.py, line 47)

In [None]:
'''# Load your BEST trained model (epoch 601)
best_params = {
    'lr': 0.000868,
    'dropout': 0.442,
    'hidden_size': 87,
    'batch_size': 95
}

# Create model with correct architecture
net = GlizzyNetTiny(
    num_classes=12, 
    dropout=best_params['dropout'], 
    base_ch=best_params['hidden_size'], 
    use_inception_first=False
)

# Load your BEST checkpoint (epoch 601) - FIX: Use epoch 601, not 16!
checkpoint_path = "D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/models/model_GlizzyNetTiny_bs95_lr0.000868_epoch601.pt"
state_dict = torch.load(checkpoint_path, map_location='cpu')
net.load_state_dict(state_dict)
net = net.to(device)

# Get data loaders
target_classes = le.classes_.tolist()
train_loader, val_loader, test_loader, classes = get_data_loader(
    target_classes, 
    batch_size=95  # Use same batch size as training
)

# Create a limited validation loader with 35 samples per class
from torch.utils.data import Subset
import numpy as np

# Get labels from validation dataset
val_labels = np.array([val_loader.dataset[i][1] for i in range(len(val_loader.dataset))])

# Find indices for first 35 samples of each class
val_indices = []
for class_idx in range(12):  # 12 classes
    class_indices = np.where(val_labels == class_idx)[0]
    # Take first 35 samples from this class (or all if less than 35)
    selected_indices = class_indices[:35]
    val_indices.extend(selected_indices.tolist())

print(f"Selected {len(val_indices)} samples total (35 per class)")

limited_val_dataset = Subset(val_loader.dataset, val_indices)
limited_val_loader = DataLoader(limited_val_dataset, batch_size=95, shuffle=False)

# Evaluate on LIMITED VALIDATION data (35 samples per class)
criterion = nn.CrossEntropyLoss()
print(f"=== Performance on Validation Data (Limited to 35 samples per class, {len(limited_val_dataset)} total) ===")
evaluate_per_class(net, limited_val_loader, le, criterion)

# Test on external test data
test_dir = "D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/test-fma-small/Multiple files/test_fma"
external_test_loader = load_test_data(test_dir, le, 95)

print("\n=== Performance on External Test Data ===")
evaluate_per_class(net, external_test_loader, le, criterion)'''

Loaded 6000 training, 1206 validation, 6000 test
Classes: ['Classical', 'Electronic', 'Experimental', 'Folk', 'Hip-Hop', 'Instrumental', 'International', 'Jazz', 'Old-Time', 'Pop', 'Rock', 'Spoken']
Selected 420 samples total (35 per class)
=== Performance on Validation Data (Limited to 35 samples per class, 420 total) ===

Test Set Classification Report (Avg Loss: 0.7520):

               precision    recall  f1-score   support

    Classical       0.85      0.94      0.89        35
   Electronic       0.86      0.89      0.87        35
 Experimental       0.91      0.89      0.90        35
         Folk       0.85      0.94      0.89        35
      Hip-Hop       0.79      0.89      0.84        35
 Instrumental       0.93      0.71      0.81        35
International       0.86      0.89      0.87        35
         Jazz       0.86      0.89      0.87        35
     Old-Time       0.82      0.66      0.73        35
          Pop       0.75      0.86      0.80        35
         Rock   

In [69]:
import torch
import librosa
import numpy as np
import torch.nn.functional as F
import pygame
import time

def play_audio_pygame(file_path):
    pygame.mixer.init()
    pygame.mixer.music.load(file_path)
    pygame.mixer.music.set_volume(0.1) # that second clip is too loud @ 100%
    pygame.mixer.music.play()
    
    # Wait for playback to finish
    while pygame.mixer.music.get_busy():
        time.sleep(0.1)
    
    pygame.mixer.quit()

# Play your 5-second MP3
audio_file1 = "D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/Giant Steps-5s.mp3"
audio_file2 = "D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/Numb (Official Music Video) [4K UPGRADE]  Linkin Park-5s.mp3"
#play_audio_pygame(audio_file1)
#play_audio_pygame(audio_file2)

SR = 16000
N_MELS = 128
TARGET_SHAPE = (128, 128)
MIN_AUDIO_LENGTH = 1.0 
MIN_VOLUME = 0.01

def is_valid_audio(y):
    # check if audio contains valid signal
    return len(y) >= SR * MIN_AUDIO_LENGTH and np.max(np.abs(y)) >= MIN_VOLUME

def process_audio_file(mp3_path):
    """Process audio file with robust error handling"""
    y, sr = librosa.load(mp3_path, sr=SR, mono=True)
    if y is None:
        return None
    
    try:
        mel = librosa.feature.melspectrogram(
            y=y, sr=sr,
            n_mels=N_MELS,
            n_fft=2048,
            hop_length=512,
            fmin=20,
            fmax=8000
        )
        
        log_mel = librosa.power_to_db(mel, ref=1.0)
        log_mel = (log_mel - log_mel.min()) / (log_mel.max() - log_mel.min() + 1e-10) * 2 - 1
        
        # convert to tensor
        tensor = torch.tensor(log_mel, dtype=torch.float32)
        tensor = resize_spectrogram(tensor, TARGET_SHAPE)
        return tensor.unsqueeze(0)
        
    except Exception as e:
        print(f"Error processing {mp3_path}: {str(e)}")
        return None

def resize_spectrogram(spec, target_shape):
    """Pad or crop spectrogram to target shape"""
    # Time dimension (width)
    if spec.shape[1] < target_shape[1]:
        spec = F.pad(spec, (0, target_shape[1] - spec.shape[1]))
    else:
        spec = spec[:, :target_shape[1]]
    
    # Frequency dimension (height)
    if spec.shape[0] < target_shape[0]:
        spec = F.pad(spec, (0, 0, 0, target_shape[0] - spec.shape[0]))
    else:
        spec = spec[:target_shape[0], :]
    
    return spec

tensor = torch.load("D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/train_set/processed_spectrograms_v2/val/Jazz/5098.pt")
tensor2 = torch.load("D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/train_set/processed_spectrograms_v2/val/Rock/1100.pt")

In [70]:
# initialize CNN
best_params = {
    'lr': 0.000868,
    'dropout': 0.442,
    'hidden_size': 87,
    'batch_size': 95
}

model = GlizzyNetTiny(
    num_classes=12, 
    dropout=best_params['dropout'], 
    base_ch=best_params['hidden_size'], 
    use_inception_first=False
)

# load the saved model weights
checkpoint_path = "D:/Documents/A-APS360/Music_Genre_Classification/Documents/APS360/Project/models/model_GlizzyNetTiny_bs95_lr0.000868_epoch601.pt"
state_dict = torch.load(checkpoint_path, map_location='cpu')
model.load_state_dict(state_dict)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

# apply normalization (same as training data)
global normalization_params
if 'normalization_params' in globals():
    tensor = (tensor - normalization_params['mean']) / normalization_params['std']
    tensor2 = (tensor2 - normalization_params['mean']) / normalization_params['std']

with torch.no_grad():
    tensor = tensor.to(device)
    tensor2 = tensor2.to(device)
    output1 = model(tensor.unsqueeze(0))
    output2 = model(tensor2.unsqueeze(0))

    # get first prediction
    predicted_class_idx1 = torch.argmax(output1, dim=1).item()
    predicted_genre1 = le.classes_[predicted_class_idx1]
    confidence1 = torch.softmax(output1, dim=1)[0][predicted_class_idx1].item()

    print(f"Output shape: {output1.shape}")
    print(f"Predicted genre: {predicted_genre1}")
    print(f"Confidence: {confidence1:.4f}")

    # top 3 predictions
    probabilities1 = torch.softmax(output1, dim=1)[0]
    top3_indices1 = torch.topk(probabilities1, 3).indices
    print("\nTop 3 predictions:")
    for i, idx in enumerate(top3_indices1):
        genre = le.classes_[idx]
        prob = probabilities1[idx].item()
        print(f"{i+1}. {genre}: {prob:.4f}")

    print("\n")
    # get second prediction
    predicted_class_idx2 = torch.argmax(output2, dim=1).item()
    predicted_genre2 = le.classes_[predicted_class_idx2]
    confidence2 = torch.softmax(output2, dim=1)[0][predicted_class_idx2].item()

    print(f"Output shape: {output2.shape}")
    print(f"Predicted genre: {predicted_genre2}")
    print(f"Confidence: {confidence2:.4f}")

    # top 3 predictions
    probabilities2 = torch.softmax(output2, dim=1)[0]
    top3_indices2 = torch.topk(probabilities2, 3).indices
    print("\nTop 3 predictions:")
    for i, idx in enumerate(top3_indices2):
        genre = le.classes_[idx]
        prob = probabilities2[idx].item()
        print(f"{i+1}. {genre}: {prob:.4f}")

Output shape: torch.Size([1, 12])
Predicted genre: Jazz
Confidence: 0.9046

Top 3 predictions:
1. Jazz: 0.9046
2. Rock: 0.0252
3. Experimental: 0.0237


Output shape: torch.Size([1, 12])
Predicted genre: Rock
Confidence: 0.9031

Top 3 predictions:
1. Rock: 0.9031
2. Jazz: 0.0255
3. Pop: 0.0209
