In [1]:
import numpy as np 
import matplotlib.pyplot as plt
from torchvision import datasets
from sklearn.model_selection import train_test_split
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, LabelEncoder


## Processing Dataset

In [2]:
df = pd.read_csv('GTZAN_Dataset/features_3_sec.csv')
print(df.shape)
# Check for missing values
print(f"Missing values in dataset: {df.isnull().sum().sum()}")

# Analyze class distribution
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='label')
plt.title('Class Distribution')
plt.xticks(rotation=45)
plt.savefig('class_distribution.png')
plt.close()

# Scale features
scalar = MinMaxScaler()
df_processed = df.drop(columns=['filename', 'label'])
df_processed_normalized = pd.DataFrame(scalar.fit_transform(df_processed), columns=df_processed.columns)

# Analyze feature correlations to identify redundancy
plt.figure(figsize=(15, 12))
correlation = df_processed.corr()
sns.heatmap(correlation, annot=False, cmap='coolwarm')
plt.title('Feature Correlation Matrix')
plt.savefig('feature_correlation.png')
plt.close()

# Add back labels
df_processed_normalized = pd.concat([df_processed_normalized, df['label']], axis=1)

# Calculate averages per genre
averages_df = df_processed_normalized.groupby('label').mean()

# Find features with highest variance between classes (most discriminative)
std_dev_of_means = averages_df.std()
sorted_by_std = std_dev_of_means.sort_values(ascending=False)
print("Top 10 discriminative features:")
print(sorted_by_std.head(10))

# Select top features (more discriminative)
top_k_features = 60  # Can be tuned based on analysis
top_features = sorted_by_std.head(top_k_features).index.tolist()

# Split data
X = df_processed_normalized.drop(columns='label')
y = df['label']
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42)


(9990, 60)
Missing values in dataset: 0
Top 10 discriminative features:
rolloff_mean               0.133172
spectral_bandwidth_mean    0.124018
chroma_stft_mean           0.104967
spectral_centroid_mean     0.104227
mfcc1_mean                 0.102413
rms_mean                   0.099443
mfcc4_mean                 0.091735
mfcc2_mean                 0.087981
mfcc6_mean                 0.083583
mfcc7_mean                 0.081289
dtype: float64


In [3]:
pca = PCA(n_components=0.95)  # Keep components that explain 95% of variance
pca.fit(X_train)

# Plot explained variance to visualize information preservation
plt.figure(figsize=(10, 6))
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.title('Explained Variance vs. Number of PCA Components')
plt.axhline(y=0.95, color='r', linestyle='--')
plt.grid(True)
plt.savefig('pca_variance.png')
plt.close()

print(f"Number of components needed for 95% variance: {pca.n_components_}")

# Transform data using PCA
X_train_transformed = (X_train)
X_val_transformed = (X_val)
X_test_transformed = (X_test)

print(f"Reduced dimensions from {X_train.shape[1]} to {X_train_transformed.shape[1]}")
print(f"Transformed shapes - Train: {X_train_transformed.shape}, Test: {X_test_transformed.shape}, Val: {X_val_transformed.shape}")
print(f"Labels distribution: {y_train.value_counts()}")

Number of components needed for 95% variance: 33
Reduced dimensions from 58 to 58
Transformed shapes - Train: (6993, 58), Test: (1498, 58), Val: (1499, 58)
Labels distribution: label
pop          733
jazz         714
country      711
disco        698
rock         698
metal        697
classical    690
hiphop       687
reggae       684
blues        681
Name: count, dtype: int64


## Training Model

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

# Encode labels
encoder = LabelEncoder()
encoder.fit(y)
y_train_encoded = encoder.transform(y_train)
y_test_encoded = encoder.transform(y_test)
y_val_encoded = encoder.transform(y_val)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train_transformed.to_numpy(), dtype=torch.float32)
Y_train_tensor = torch.tensor(y_train_encoded, dtype=torch.long)

X_val_tensor = torch.tensor(X_val_transformed.to_numpy(), dtype=torch.float32)
Y_val_tensor = torch.tensor(y_val_encoded, dtype=torch.long)

X_test_tensor = torch.tensor(X_test_transformed.to_numpy(), dtype=torch.float32)
Y_test_tensor = torch.tensor(y_test_encoded, dtype=torch.long)

# Create datasets
trainset = TensorDataset(X_train_tensor, Y_train_tensor)
valset = TensorDataset(X_val_tensor, Y_val_tensor)
testset = TensorDataset(X_test_tensor, Y_test_tensor)

# Check dataset dimensions
song_stats, label = trainset[8]
print(f"Example feature vector shape: {song_stats.shape}")
print(f"Example label: {label} ({encoder.inverse_transform([label])[0]})")

print(f'Train set size: {len(trainset)}, Validation set size: {len(valset)}, Test set size: {len(testset)}')


Example feature vector shape: torch.Size([58])
Example label: 7 (pop)
Train set size: 6993, Validation set size: 1499, Test set size: 1498


In [5]:
batchsize = 32  # Increased from 16 for faster training

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchsize, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=batchsize, shuffle=False)
testloader = torch.utils.data.DataLoader(testset, batch_size=batchsize, shuffle=False)


In [6]:
n_pixels = X_train_transformed.shape[1]  # Number of PCA components
n_classes = 10  # 10 music genres in GTZAN

class ImprovedMLP(nn.Module):
    def __init__(self, n_pixels, n_classes):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(n_pixels, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(64, n_classes)  
        )
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)
        return self.model(x)

model = ImprovedMLP(n_pixels, n_classes)
print(model)

ImprovedMLP(
  (model): Sequential(
    (0): Linear(in_features=58, out_features=256, bias=True)
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.3, inplace=False)
    (8): Linear(in_features=128, out_features=64, bias=True)
    (9): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Dropout(p=0.4, inplace=False)
    (12): Linear(in_features=64, out_features=10, bias=True)
  )
)


In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=3, verbose=True
)



In [8]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
elif hasattr(torch, 'backends') and hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device("mps")  # MPS acceleration is available on MacOS 12.3+
else:
    device = torch.device("cpu")

print(f'Using device: {device}')
model.to(device)

Using device: mps


ImprovedMLP(
  (model): Sequential(
    (0): Linear(in_features=58, out_features=256, bias=True)
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.3, inplace=False)
    (8): Linear(in_features=128, out_features=64, bias=True)
    (9): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Dropout(p=0.4, inplace=False)
    (12): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [9]:
def train(model, trainloader, criterion, optimizer, device):
    model.train()
    running_loss = 0
    running_acc = 0
    with tqdm(total=len(trainloader), desc=f"Train", unit="batch") as pbar:
        for n_batch, (images, labels) in enumerate(trainloader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            running_acc += (output.argmax(1) == labels).float().mean().item()
            pbar.set_postfix({'loss': loss.item(), 'acc': 100. * running_acc / (n_batch+1)})
            pbar.update()
    return running_loss / len(trainloader), running_acc / len(trainloader)


In [10]:
def validate(model, valloader, criterion, device):
    model.eval()
    running_loss = 0
    running_acc = 0
    with torch.no_grad():
        with tqdm(total=len(valloader), desc=f"Eval", unit="batch") as pbar:
            for n_batch, (images, labels) in enumerate(valloader):
                images, labels = images.to(device), labels.to(device)
                output = model(images)
                loss = criterion(output, labels)
                running_loss += loss.item()
                running_acc += (output.argmax(1) == labels).float().mean().item()
                pbar.set_postfix({'loss': loss.item(), 'acc': 100. * running_acc / (n_batch+1)})
                pbar.update()
    return running_loss / len(valloader), running_acc / len(valloader)


In [11]:
def evaluate_model(model, dataloader, device, encoder):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate and plot confusion matrix
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(all_labels, all_preds)
    
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=encoder.classes_, 
                yticklabels=encoder.classes_)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.savefig('confusion_matrix.png')
    plt.close()
    
    # Calculate per-class metrics
    precision = np.diag(cm) / np.sum(cm, axis=0)
    recall = np.diag(cm) / np.sum(cm, axis=1)
    f1 = 2 * (precision * recall) / (precision + recall)
    
    for i, class_name in enumerate(encoder.classes_):
        print(f"{class_name}: Precision: {precision[i]:.4f}, Recall: {recall[i]:.4f}, F1: {f1[i]:.4f}")
    
    return np.mean(f1)

In [12]:
# Run training and validation loop
# Save the best model based on validation accuracy
n_epochs = 50
best_acc = -1
train_loss_history = []; train_acc_history = []
val_loss_history = []; val_acc_history = []
for epoch in range(n_epochs): # Iterate over epochs
    print(f"Epoch {epoch+1} of {n_epochs}")
    train_loss, train_acc  = train(model, trainloader, criterion, optimizer, device) # Train
    val_loss, val_acc = validate(model, valloader, criterion, device) # Validate
    train_loss_history.append(train_loss); train_acc_history.append(train_acc)
    val_loss_history.append(val_loss); val_acc_history.append(val_acc)
    if val_acc > best_acc: # Save best model
        best_acc = val_acc
        torch.save(model.state_dict(), "best_model.pt") # saving model parameters ("state_dict") saves memory and is faster than saving the entire model

Epoch 1 of 50


Train: 100%|██████████| 219/219 [00:03<00:00, 70.57batch/s, loss=1.7, acc=47.3]   
Eval: 100%|██████████| 47/47 [00:00<00:00, 213.28batch/s, loss=0.991, acc=64.3]


Epoch 2 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 183.55batch/s, loss=0.768, acc=61.7]
Eval: 100%|██████████| 47/47 [00:00<00:00, 496.32batch/s, loss=0.835, acc=72.6]


Epoch 3 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 195.36batch/s, loss=1.26, acc=67.2] 
Eval: 100%|██████████| 47/47 [00:00<00:00, 502.43batch/s, loss=0.715, acc=74.4]


Epoch 4 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 193.91batch/s, loss=1.59, acc=70.2] 
Eval: 100%|██████████| 47/47 [00:00<00:00, 502.81batch/s, loss=0.811, acc=76.9]


Epoch 5 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 184.24batch/s, loss=0.826, acc=70.8]
Eval: 100%|██████████| 47/47 [00:00<00:00, 522.10batch/s, loss=0.685, acc=78.9]


Epoch 6 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 188.12batch/s, loss=0.861, acc=72.8]
Eval: 100%|██████████| 47/47 [00:00<00:00, 508.91batch/s, loss=0.723, acc=79.8]


Epoch 7 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 192.15batch/s, loss=1.09, acc=74.5] 
Eval: 100%|██████████| 47/47 [00:00<00:00, 505.35batch/s, loss=0.763, acc=80.4]


Epoch 8 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 195.47batch/s, loss=1.48, acc=75.4] 
Eval: 100%|██████████| 47/47 [00:00<00:00, 506.05batch/s, loss=0.641, acc=79.8]


Epoch 9 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 189.27batch/s, loss=0.266, acc=75.7]
Eval: 100%|██████████| 47/47 [00:00<00:00, 510.01batch/s, loss=0.674, acc=80.9]


Epoch 10 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 192.60batch/s, loss=0.732, acc=77.5]
Eval: 100%|██████████| 47/47 [00:00<00:00, 434.00batch/s, loss=0.702, acc=82.5]


Epoch 11 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 181.77batch/s, loss=0.434, acc=78.3]
Eval: 100%|██████████| 47/47 [00:00<00:00, 457.58batch/s, loss=0.748, acc=83.6]


Epoch 12 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 180.10batch/s, loss=0.638, acc=78.5]
Eval: 100%|██████████| 47/47 [00:00<00:00, 506.04batch/s, loss=0.862, acc=84.2]


Epoch 13 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 189.72batch/s, loss=0.531, acc=79.4]
Eval: 100%|██████████| 47/47 [00:00<00:00, 505.15batch/s, loss=0.635, acc=84.3]


Epoch 14 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 194.62batch/s, loss=0.934, acc=79.4]
Eval: 100%|██████████| 47/47 [00:00<00:00, 513.67batch/s, loss=0.672, acc=84.4]


Epoch 15 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 196.98batch/s, loss=0.551, acc=80.4]
Eval: 100%|██████████| 47/47 [00:00<00:00, 442.77batch/s, loss=0.632, acc=86.3]


Epoch 16 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 137.52batch/s, loss=0.459, acc=81.2]
Eval: 100%|██████████| 47/47 [00:00<00:00, 415.55batch/s, loss=0.674, acc=83.8]


Epoch 17 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 182.93batch/s, loss=0.313, acc=80.8]
Eval: 100%|██████████| 47/47 [00:00<00:00, 529.41batch/s, loss=0.726, acc=85.1]


Epoch 18 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 190.25batch/s, loss=0.458, acc=82.1]
Eval: 100%|██████████| 47/47 [00:00<00:00, 515.10batch/s, loss=0.742, acc=85.6]


Epoch 19 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 175.03batch/s, loss=0.516, acc=82.6]
Eval: 100%|██████████| 47/47 [00:00<00:00, 499.20batch/s, loss=0.685, acc=85.7]


Epoch 20 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 196.36batch/s, loss=0.507, acc=82.3]
Eval: 100%|██████████| 47/47 [00:00<00:00, 445.25batch/s, loss=0.661, acc=87]  


Epoch 21 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 196.22batch/s, loss=0.328, acc=83.9]
Eval: 100%|██████████| 47/47 [00:00<00:00, 515.55batch/s, loss=0.503, acc=87.7]


Epoch 22 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 189.18batch/s, loss=0.518, acc=82.8]
Eval: 100%|██████████| 47/47 [00:00<00:00, 506.82batch/s, loss=0.65, acc=87.3] 


Epoch 23 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 190.43batch/s, loss=0.61, acc=84]   
Eval: 100%|██████████| 47/47 [00:00<00:00, 511.16batch/s, loss=0.652, acc=86.4]


Epoch 24 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 187.11batch/s, loss=0.542, acc=83.7]
Eval: 100%|██████████| 47/47 [00:00<00:00, 501.93batch/s, loss=0.642, acc=89.3]


Epoch 25 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 172.19batch/s, loss=0.782, acc=84.7]
Eval: 100%|██████████| 47/47 [00:00<00:00, 514.55batch/s, loss=0.526, acc=88.8] 


Epoch 26 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 178.53batch/s, loss=0.402, acc=84.9]
Eval: 100%|██████████| 47/47 [00:00<00:00, 500.12batch/s, loss=0.721, acc=86.9]


Epoch 27 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 174.30batch/s, loss=0.312, acc=84.4]
Eval: 100%|██████████| 47/47 [00:00<00:00, 490.24batch/s, loss=0.532, acc=89]  


Epoch 28 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 170.99batch/s, loss=0.693, acc=85.5]
Eval: 100%|██████████| 47/47 [00:00<00:00, 494.68batch/s, loss=0.364, acc=88.7] 


Epoch 29 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 182.61batch/s, loss=0.371, acc=85.7]
Eval: 100%|██████████| 47/47 [00:00<00:00, 517.51batch/s, loss=0.476, acc=88.1]


Epoch 30 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 194.29batch/s, loss=0.411, acc=85.8]
Eval: 100%|██████████| 47/47 [00:00<00:00, 518.53batch/s, loss=0.544, acc=89.8]


Epoch 31 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 193.53batch/s, loss=0.119, acc=86.1]
Eval: 100%|██████████| 47/47 [00:00<00:00, 517.58batch/s, loss=0.412, acc=88.1]


Epoch 32 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 180.26batch/s, loss=0.187, acc=85.8]
Eval: 100%|██████████| 47/47 [00:00<00:00, 460.20batch/s, loss=0.655, acc=88.4] 


Epoch 33 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 177.21batch/s, loss=0.278, acc=86.7]
Eval: 100%|██████████| 47/47 [00:00<00:00, 484.74batch/s, loss=0.521, acc=89.1]


Epoch 34 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 192.31batch/s, loss=0.935, acc=86.6]
Eval: 100%|██████████| 47/47 [00:00<00:00, 511.89batch/s, loss=0.597, acc=89.4]


Epoch 35 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 197.79batch/s, loss=0.442, acc=87.6] 
Eval: 100%|██████████| 47/47 [00:00<00:00, 511.04batch/s, loss=0.473, acc=89.5] 


Epoch 36 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 191.30batch/s, loss=0.461, acc=87.5]
Eval: 100%|██████████| 47/47 [00:00<00:00, 511.00batch/s, loss=0.483, acc=88.8]


Epoch 37 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 195.53batch/s, loss=0.876, acc=86.8]
Eval: 100%|██████████| 47/47 [00:00<00:00, 506.49batch/s, loss=0.448, acc=86.7]


Epoch 38 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 186.93batch/s, loss=0.258, acc=87.9]
Eval: 100%|██████████| 47/47 [00:00<00:00, 492.48batch/s, loss=0.541, acc=89.1] 


Epoch 39 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 190.35batch/s, loss=0.574, acc=88]   
Eval: 100%|██████████| 47/47 [00:00<00:00, 483.67batch/s, loss=0.43, acc=89.7]  


Epoch 40 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 196.08batch/s, loss=0.658, acc=87.7] 
Eval: 100%|██████████| 47/47 [00:00<00:00, 520.63batch/s, loss=0.421, acc=90.1] 


Epoch 41 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 195.29batch/s, loss=1.24, acc=88]    
Eval: 100%|██████████| 47/47 [00:00<00:00, 516.95batch/s, loss=0.393, acc=91]   


Epoch 42 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 197.43batch/s, loss=0.153, acc=88]  
Eval: 100%|██████████| 47/47 [00:00<00:00, 510.52batch/s, loss=0.592, acc=89.5]


Epoch 43 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 194.90batch/s, loss=1.22, acc=87.9]  
Eval: 100%|██████████| 47/47 [00:00<00:00, 510.61batch/s, loss=0.45, acc=88.8] 


Epoch 44 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 198.50batch/s, loss=0.544, acc=88]  
Eval: 100%|██████████| 47/47 [00:00<00:00, 521.01batch/s, loss=0.452, acc=91.3]


Epoch 45 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 195.81batch/s, loss=0.58, acc=88]   
Eval: 100%|██████████| 47/47 [00:00<00:00, 513.66batch/s, loss=0.446, acc=89.7] 


Epoch 46 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 199.51batch/s, loss=0.557, acc=88.6] 
Eval: 100%|██████████| 47/47 [00:00<00:00, 526.85batch/s, loss=0.372, acc=90.7] 


Epoch 47 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 195.90batch/s, loss=0.52, acc=87.9]  
Eval: 100%|██████████| 47/47 [00:00<00:00, 513.63batch/s, loss=0.551, acc=90.6] 


Epoch 48 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 199.89batch/s, loss=0.514, acc=88.1]
Eval: 100%|██████████| 47/47 [00:00<00:00, 512.37batch/s, loss=0.491, acc=91.4] 


Epoch 49 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 196.26batch/s, loss=0.297, acc=88.6] 
Eval: 100%|██████████| 47/47 [00:00<00:00, 509.40batch/s, loss=0.582, acc=90.8] 


Epoch 50 of 50


Train: 100%|██████████| 219/219 [00:01<00:00, 199.57batch/s, loss=0.26, acc=89]    
Eval: 100%|██████████| 47/47 [00:00<00:00, 512.20batch/s, loss=0.67, acc=89.9]  


In [13]:
model.load_state_dict(torch.load("best_model.pt"))
test_loss, test_acc = validate(model, testloader, criterion, device)
print(f"Test accuracy: {test_acc:.4f}")

# Generate confusion matrix and per-class metrics
print("Detailed evaluation on test set:")
f1_score = evaluate_model(model, testloader, device, encoder)
print(f"Overall F1 score: {f1_score:.4f}")

Eval: 100%|██████████| 47/47 [00:00<00:00, 262.48batch/s, loss=0.224, acc=89.2] 


Test accuracy: 0.8925
Detailed evaluation on test set:
blues: Precision: 0.9050, Recall: 0.9419, F1: 0.9231
classical: Precision: 0.9012, Recall: 0.9733, F1: 0.9359
country: Precision: 0.8231, Recall: 0.8643, F1: 0.8432
disco: Precision: 0.8289, Recall: 0.8456, F1: 0.8372
hiphop: Precision: 0.9388, Recall: 0.8903, F1: 0.9139
jazz: Precision: 0.9091, Recall: 0.8917, F1: 0.9003
metal: Precision: 0.9424, Recall: 0.8912, F1: 0.9161
pop: Precision: 0.9051, Recall: 0.9394, F1: 0.9219
reggae: Precision: 0.9085, Recall: 0.9085, F1: 0.9085
rock: Precision: 0.8594, Recall: 0.7692, F1: 0.8118
Overall F1 score: 0.8912
