In [1]:
import torch
import numpy as np
import os
import torch.nn as nn
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split, Subset
from sklearn.metrics import average_precision_score
from sklearn.metrics import f1_score, precision_score, recall_score, average_precision_score
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

print("imports successful!")

imports successful!


In [2]:
x_train = torch.load("/scratch/gd2574/AudioSet-classification/Data/train/train_rep.pt")
y_train = torch.load("/scratch/gd2574/AudioSet-classification/Data/train/labels.pt")
x_test = torch.load("/scratch/gd2574/AudioSet-classification/Data/test/test_rep.pt")
y_test = torch.load("/scratch/gd2574/AudioSet-classification/Data/test/labels.pt")

  x_train = torch.load("/scratch/gd2574/AudioSet-classification/Data/train/train_rep.pt")
  y_train = torch.load("/scratch/gd2574/AudioSet-classification/Data/train/labels.pt")
  x_test = torch.load("/scratch/gd2574/AudioSet-classification/Data/test/test_rep.pt")
  y_test = torch.load("/scratch/gd2574/AudioSet-classification/Data/test/labels.pt")


In [3]:
def create_stratified_split(x_data, y_data, random_state=42):
    mskf = MultilabelStratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)
    indices = np.arange(len(y_data))
    for train_idx, val_idx in mskf.split(indices, y_data):
        return train_idx, val_idx

In [4]:
def calculate_map(y_true, y_pred):
    n_classes = y_true.shape[1]
    average_precisions = []
    for i in range(n_classes):
        y_true_class = y_true[:, i]
        y_pred_class = y_pred[:, i]
        ap = average_precision_score(y_true_class, y_pred_class)
        average_precisions.append(ap)
    average_precisions_sorted = sorted(average_precisions, reverse=True)
    print("\nTop 5 class-wise Average Precisions:")
    for i, ap in enumerate(average_precisions_sorted[:5]):
        print(f"Class {i+1}: {ap:.4f}")
    
    return np.mean(average_precisions)

In [5]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, device='cuda'):
    print("Model being trained: ", model.__class__.__name__)
    model = model.to(device)
    best_map = 0
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0
        for batch_x, batch_y in train_loader:
            batch_x = batch_x.float().to(device)
            batch_y = batch_y.float().to(device)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        # Validation phase
        model.eval()
        val_predictions = []
        val_targets = []
        val_loss = 0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x = batch_x.float().to(device)
                batch_y = batch_y.float().to(device)
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
                val_predictions.extend(outputs.cpu().numpy())
                val_targets.extend(batch_y.cpu().numpy())
        
        # lists -> numpy arrays for metric calculation
        val_predictions = np.array(val_predictions)
        val_targets = np.array(val_targets)
        val_map = calculate_map(val_targets, val_predictions)
        val_f1 = f1_score(val_targets, (val_predictions > 0.5).astype(float), average='micro')
        print(f'\nEpoch {epoch+1}/{num_epochs}:')
        print(f'Training Loss: {train_loss/len(train_loader):.4f}')
        print(f'Validation Loss: {val_loss/len(val_loader):.4f}')
        print(f'Validation MAP: {val_map:.4f}')
        print(f'Validation F1-Score: {val_f1:.4f}')
        
        # Save best model based on MAP
        if val_map > best_map:
            best_map = val_map
            model_name = model.__class__.__name__ 
            filename = f"best_{model_name}_map_lastlayer.pth"
            torch.save(model.state_dict(), filename)
            print(f"New best model saved as {filename}!")
        
        print('-' * 50)
    return model, best_map  

In [6]:
def test_model(model, device='cuda'):
    print("Model being tested: ", model.__class__.__name__)
    test_dataset = TensorDataset(x_test.float(), y_test.float())
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    model.load_state_dict(torch.load("best_CNNClassifier_map_lastlayer.pth"))
    model.eval()
    
    test_predictions = []
    test_targets = []
    
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x = batch_x.float().to(device)
            outputs = model(batch_x)
            test_predictions.extend(outputs.cpu().numpy())
            test_targets.extend(batch_y.cpu().numpy())
    
    test_predictions = np.array(test_predictions)
    test_targets = np.array(test_targets)
    
    test_map = calculate_map(test_targets, test_predictions)
    test_f1 = f1_score(test_targets, (test_predictions > 0.5).astype(float), average="micro")
    
    print(f"Test MAP: {test_map:.4f}")
    print(f"Test F1-Score: {test_f1:.4f}")
    
    return test_map, test_f1

In [7]:
class CNNClassifier(nn.Module):
    def __init__(self, input_channels=768, num_classes=527):
        super(CNNClassifier, self).__init__()
        
        # First Convolutional Block
        self.conv1 = nn.Sequential(
            nn.Conv1d(input_channels, 256, kernel_size=3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.MaxPool1d(2)
        )   
        # Second Convolutional Block
        self.conv2 = nn.Sequential(
            nn.Conv1d(256, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(2)
        )
        # Third Convolutional Block
        self.conv3 = nn.Sequential(
            nn.Conv1d(128, 64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2)
        )
        self.flatten_size = 64 * (496 // (2*2*2))
        
        # Fully connected layers
        self.fc = nn.Sequential(
            nn.Linear(self.flatten_size, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, num_classes),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        x = x.transpose(1, 2)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.reshape(x.size(0), -1)
        x = self.fc(x)
        return x

In [8]:
batch_size = 32
learning_rate= 0.001
num_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x_train_float = x_train.float()
y_train_float = y_train.float()
full_dataset = TensorDataset(x_train_float, y_train_float)
total_size = len(full_dataset)

train_indices, val_indices = create_stratified_split(x_train_float, y_train_float.numpy())

train_dataset = Subset(full_dataset, train_indices)
val_dataset = Subset(full_dataset, val_indices)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

criterion = nn.BCELoss()

In [9]:
model = CNNClassifier()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
model, best_val_map = train_model( model, train_loader, val_loader, criterion, optimizer, num_epochs, device)

Model being trained:  CNNClassifier

Top 5 class-wise Average Precisions:
Class 1: 0.9447
Class 2: 0.8276
Class 3: 0.8101
Class 4: 0.7707
Class 5: 0.7667

Epoch 1/10:
Training Loss: 0.0285
Validation Loss: 0.0187
Validation MAP: 0.1283
Validation F1-Score: 0.2761
New best model saved as best_CNNClassifier_map_lastlayer.pth!
--------------------------------------------------

Top 5 class-wise Average Precisions:
Class 1: 0.9124
Class 2: 0.8931
Class 3: 0.8873
Class 4: 0.8872
Class 5: 0.8826

Epoch 2/10:
Training Loss: 0.0185
Validation Loss: 0.0157
Validation MAP: 0.2471
Validation F1-Score: 0.2950
New best model saved as best_CNNClassifier_map_lastlayer.pth!
--------------------------------------------------

Top 5 class-wise Average Precisions:
Class 1: 1.0000
Class 2: 0.9429
Class 3: 0.9310
Class 4: 0.9210
Class 5: 0.8838

Epoch 3/10:
Training Loss: 0.0161
Validation Loss: 0.0148
Validation MAP: 0.3029
Validation F1-Score: 0.3532
New best model saved as best_CNNClassifier_map_lastlay

In [10]:
test_map, test_f1 = test_model(model, device)

Model being tested:  CNNClassifier


  model.load_state_dict(torch.load("best_CNNClassifier_map_lastlayer.pth"))



Top 5 class-wise Average Precisions:
Class 1: 0.9061
Class 2: 0.8894
Class 3: 0.8561
Class 4: 0.8466
Class 5: 0.8382
Test MAP: 0.3320
Test F1-Score: 0.4476
