### Homsiang - MFCC

Step 1: Define Helper Functions for Segmenting and Padding

In [1]:
### load data and labels
### mfcc has various lengths 
import pandas as pd
import numpy as np

def segment_mfcc(mfcc, max_segment_length=200):
    # Segment the MFCC array into fixed lengths with possible overlap (if desired)
    # segment_length is the fixed length of each segment
    segments = []
    for start in range(0, mfcc.shape[1], max_segment_length):
        end = start + max_segment_length
        if end < mfcc.shape[1]:
            segments.append(mfcc[:, start:end])
        else:
            # Padding the last segment if it's shorter than the required segment length
            segments.append(np.pad(mfcc[:, start:], ((0,0), (0, max_segment_length - (mfcc.shape[1] - start))), 'constant'))
    return segments

def load_concatenated_mfcc(path):
    
    # Load the concatenated MFCC data
    data = np.load(path)
    return data['mfcc']

df = pd.read_csv('datasets/DAIC-WOZ/Patient_Classes.csv')

# for _, row in df.iterrows():
#     patient_id = row['Participant_ID']
#     label = row['PHQ8_Binary']
#     train_or_test = row['dataset'] # test or dev
#     mfcc_path = 'datasets/DAIC-WOZ/ConcatenatedMFCC/concatenated_mfcc_'+str(patient_id)+'.npz'
#     try:
#         mfcc = load_concatenated_mfcc(mfcc_path) # the raw mfcc data
#         # make a dataset to be able to train the CNN

#     except Exception as e:
#         print(e)
#         continue

def create_datasets(df, max_segment_length=500):
    dataset = {'train': [], 'test': []}
    labels = {'train': [], 'test': []}

    for _, row in df.iterrows():
        patient_id = row['Participant_ID']
        label = row['PHQ8_Binary']
        print(patient_id, label)
        train_or_test = row['dataset']  # Could be 'train' or 'test'
        # if train_or_test is 'dev' change it to 'test'
        if train_or_test == 'dev':
            train_or_test = 'test'

        mfcc_path = f'datasets/DAIC-WOZ/ConcatenatedMFCC/concatenated_mfcc_{patient_id}.npz'

        try:
            mfcc = load_concatenated_mfcc(mfcc_path)  # Load the raw MFCC data
            segments = segment_mfcc(mfcc, max_segment_length=max_segment_length)
            
            # Append each segment to the corresponding dataset
            for segment in segments:
                dataset[train_or_test].append(segment)
                labels[train_or_test].append(label)
        except Exception as e:
            print(f"Failed to process patient {patient_id}: {e}")

    return dataset, labels


Step 2: Initialize DataLoaders
python
Copy code


In [6]:
import torch

In [3]:
from torch.utils.data import TensorDataset, DataLoader, Dataset

def prepare_dataloaders(dataset, labels, batch_size=32):
    dataloaders = {}
    for phase in ['train', 'test']:
        features = torch.tensor(dataset[phase]).float()  # Convert features to float tensors
        targets = torch.tensor(labels[phase]).long()  # Convert labels to long tensors

        # Reshape for Conv1D: [batch, channels, length]
        features = features.permute(0, 1, 2)

        data_set = TensorDataset(features, targets)
        dataloaders[phase] = DataLoader(data_set, batch_size=batch_size, shuffle=(phase == 'train'))
    
    return dataloaders

SEQMENT_LENGTH = 50
# Assuming you have already loaded and segmented the data
dataset, labels = create_datasets(df, max_segment_length=SEQMENT_LENGTH) 
# dataloaders = prepare_dataloaders(dataset, labels)

303 0
304 0
305 0
310 0
312 0
313 0
315 0
316 0
317 0
318 0
319 1
320 1
321 1
322 0
324 0
325 1
326 0
327 0
328 0
330 1
333 0
336 0
338 1
339 1
340 0
341 0
343 0
344 1
345 1
347 1
348 1
350 1
351 1
352 1
353 1
355 1
356 1
357 0
358 0
360 0
362 1
363 0
364 0
366 0
368 0
369 0
370 0
371 0
372 1
374 0
375 0
376 1
379 0
380 1
383 0
385 0
386 1
391 0
392 0
393 0
397 0
400 0
401 0
402 1
409 0
412 1
414 1
415 0
416 0
419 0
423 0
425 0
426 1
427 0
428 0
429 0
430 0
433 1
434 0
437 0
441 1
443 0
444 0
445 0
446 0
447 0
448 1
449 0
454 0
455 0
456 0
457 0
459 1
463 0
464 0
468 0
471 0
473 0
474 0
475 0
478 0
479 0
485 0
486 0
487 0
488 0
491 0
Failed to process patient 491: [Errno 2] No such file or directory: 'datasets/DAIC-WOZ/ConcatenatedMFCC/concatenated_mfcc_491.npz'
302 0
307 0
331 0
335 1
346 1
367 1
377 1
381 1
382 0
388 1
389 1
390 0
395 0
403 0
404 0
406 0
413 1
417 0
418 1
420 0
422 1
436 0
439 0
440 1
451 0
458 0
472 0
476 0
477 0
482 0
483 1
484 0
489 0
Failed to process patient 489

In [7]:
class AudioDataset(Dataset):
    def __init__(self, mfcc_features, labels):
        self.features = torch.FloatTensor(mfcc_features)
        self.labels = torch.LongTensor(labels)
        
    def __len__(self):
        return len(self.labels)
        
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [8]:
dataset_train = AudioDataset(dataset["train"], labels["test"])
dataset_val = AudioDataset(dataset["test"], labels["test"])

  self.features = torch.FloatTensor(mfcc_features)


In [9]:
import torch.nn as nn

class DepressionDetector1DCNN(nn.Module):
    def __init__(self, input_features=40, sequence_length=200):
        super(DepressionDetector1DCNN, self).__init__()
        
        self.conv_layers = nn.Sequential(
            nn.Conv1d(input_features, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(64),
            
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(128),
            
            nn.Conv1d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(256)
        )
        
        self.flatten = nn.Flatten()
        
        # Calculate the size after convolutions
        # After each MaxPool1d(2), the sequence length is halved
        final_sequence_length = sequence_length // (2 * 2 * 2)  # Three MaxPool layers
        self.dense_input_size = 256 * final_sequence_length
        
        self.dense_layers = nn.Sequential(
            nn.Linear(self.dense_input_size, 128),
            nn.ReLU(),
            nn.Dropout(0.7), # add more
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.5), # add more
            nn.Linear(64, 2)
        )
        
    def forward(self, x):
        # Input shape: [batch_size, features, sequence_length]
        x = self.conv_layers(x)
        x = self.flatten(x)
        x = self.dense_layers(x)
        return x

In [10]:
def train_model(train_loader, val_loader, sequence_length=200, num_epochs=200, batch_size=16, learning_rate=0.001):
    # Initialize model, loss function, and optimizer
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = DepressionDetector1DCNN(input_features=40, sequence_length=sequence_length).to(device)
    criterion = nn.CrossEntropyLoss()
    # optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.01)
    
    # Training loop
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0
        
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            train_total += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()
            
        # Validation
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for features, labels in val_loader:
                features, labels = features.to(device), labels.to(device)
                outputs = model(features)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()
        
        # Print epoch statistics
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}]')
            print(f'Train Loss: {train_loss/len(train_loader):.4f}, '
                  f'Train Acc: {100.*train_correct/train_total:.2f}%')
            print(f'Val Loss: {val_loss/len(val_loader):.4f}, '
                  f'Val Acc: {100.*val_correct/val_total:.2f}%\n')
    
    return model

In [34]:
# another CNN
class DepressionDetectorCNN(nn.Module):
    def __init__(self, input_features=40, sequence_length=200):
        super(DepressionDetectorCNN, self).__init__()
        
        # First conv block - keeping input channels smaller than paper
        self.conv1 = nn.Sequential(
            nn.Conv1d(input_features, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(32),
            nn.Dropout(0.2)  # Adding dropout after each block
        )
        
        # Second conv block
        self.conv2 = nn.Sequential(
            nn.Conv1d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(64),
            nn.Dropout(0.3)
        )
        
        # Third conv block
        self.conv3 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(128),
            nn.Dropout(0.4)
        )
        
        # Calculate the size after convolutions
        final_sequence_length = sequence_length // (2 * 2 * 2)  # Three MaxPool layers
        self.dense_input_size = 128 * final_sequence_length
        
        # Dense layers with increased dropout
        self.dense = nn.Sequential(
            nn.Linear(self.dense_input_size, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(32, 2)
        )
        
    def forward(self, x):
        # Input shape: [batch_size, features, sequence_length]
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.flatten(1)
        x = self.dense(x)
        return x

### Another full motherfucker

In [11]:
import torch
import torch.nn as nn
import numpy as np

class DepressionGCNN(nn.Module):
    def __init__(self, input_features=40, sequence_length=200, num_neighbors=9):
        super(DepressionGCNN, self).__init__()
        
        self.sequence_length = sequence_length
        self.num_neighbors = num_neighbors
        
        # Graph convolutional layers
        self.graph_conv1 = GraphConvLayer(input_features, 64)
        self.graph_conv2 = GraphConvLayer(64, 128)
        self.graph_conv3 = GraphConvLayer(128, 256)
        self.graph_conv4 = GraphConvLayer(256, 512)
        
        # Calculate flattened size based on sequence length
        self.flatten_size = 512 * (sequence_length // 16)  # Due to pooling in graph conv layers
        
        # Dense layers with dropouts
        self.dense = nn.Sequential(
            nn.Linear(self.flatten_size, 4096),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(4096, 1024),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(256, 2)
        )
        
    def forward(self, x, similarity_graphs):
        # x shape: [batch_size, input_features, sequence_length]
        x = self.graph_conv1(x)
        x = self.graph_conv2(x)
        x = self.graph_conv3(x)
        x = self.graph_conv4(x)
        
        # Flatten
        x = x.reshape(x.size(0), -1)
        
        # Dense layers
        x = self.dense(x)
        return x

class GraphConvLayer(nn.Module):
    def __init__(self, in_features, out_features):
        super(GraphConvLayer, self).__init__()
        
        self.conv = nn.Sequential(
            nn.Conv1d(in_features, out_features, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(out_features)
        )
        
    def forward(self, x):
        # x shape: [batch_size, in_features, sequence_length]
        x = self.conv(x)
        return x

def calculate_similarity_graphs(features, num_neighbors=9):
    """
    Calculate similarity graphs between feature dimensions
    features: [batch_size, features, sequence_length]
    """
    # Average across sequence length to get feature correlations
    features_mean = features.mean(dim=-1)  # [batch_size, features]
    
    num_features = features_mean.shape[1]
    similarity_matrix = np.zeros((num_features, num_features))
    
    # Calculate correlation coefficients
    features_np = features_mean.cpu().numpy()
    for i in range(num_features):
        for j in range(num_features):
            correlation = np.corrcoef(features_np[:, i], features_np[:, j])[0, 1]
            similarity_matrix[i, j] = abs(correlation)
    
    # For each feature, get top k similar features
    similarity_graphs = []
    for i in range(num_features):
        # Get indices of top k similar features (excluding self)
        similar_features = np.argsort(similarity_matrix[i])[-num_neighbors-1:-1]
        similarity_graphs.append(similar_features)
    
    return similarity_graphs

def train_model_three(model, train_loader, val_loader, num_epochs=200, learning_rate=0.001):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    # Calculate similarity graphs once before training
    features = next(iter(train_loader))[0]
    similarity_graphs = calculate_similarity_graphs(features)
    
    best_val_acc = 0
    patience = 20
    patience_counter = 0
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0
        
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(features, similarity_graphs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            train_total += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()
        
        # Validation
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for features, labels in val_loader:
                features, labels = features.to(device), labels.to(device)
                outputs = model(features, similarity_graphs)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()
        
        val_acc = 100. * val_correct / val_total
        
        # Early stopping
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping triggered at epoch {epoch+1}")
                break
        
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}]')
            print(f'Train Loss: {train_loss/len(train_loader):.4f}, '
                  f'Train Acc: {100.*train_correct/train_total:.2f}%')
            print(f'Val Loss: {val_loss/len(val_loader):.4f}, '
                  f'Val Acc: {val_acc:.2f}%\n')
    
    return model

In [13]:
import torch.optim as optim

# Initialize the model with your specific dimensions
# first model 
# model = DepressionDetector1DCNN(input_features=40, sequence_length=SEQMENT_LENGTH)

# second model
# model = DepressionDetectorCNN(input_features=40, sequence_length=SEQMENT_LENGTH)

# third model

model = DepressionGCNN(input_features=40, sequence_length=SEQMENT_LENGTH)

BATCH_SIZE = 32

# Create data loaders
train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(dataset_val, batch_size=BATCH_SIZE)

# Use the training function as before
# model = train_model(train_loader, train_loader, sequence_length=SEQMENT_LENGTH, num_epochs=200, batch_size=BATCH_SIZE, learning_rate=0.001)

model = train_model_three(model, train_loader, val_loader)

Epoch [10/200]
Train Loss: 0.3096, Train Acc: 85.84%
Val Loss: 1.5318, Val Acc: 50.53%

Epoch [20/200]
Train Loss: 0.1656, Train Acc: 93.12%
Val Loss: 2.2440, Val Acc: 53.77%

Epoch [30/200]
Train Loss: 0.0938, Train Acc: 96.41%
Val Loss: 2.9689, Val Acc: 50.83%

Early stopping triggered at epoch 40


### The model

In [17]:
class DepressionDetector1DCNN(nn.Module):
    def __init__(self, input_features=40):
        super(DepressionDetector1DCNN, self).__init__()
        
        self.conv_layers = nn.Sequential(
            nn.Conv1d(input_features, 64, kernel_size=3, padding=1), # 64 ->> 500
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(64),
            
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(128),
            
            nn.Conv1d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(256)
        )
        
        self.flatten = nn.Flatten()
        
        # Calculate size after convolutions
        self.dense_layers = nn.Sequential(
            nn.Linear(256 * 8, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 2)
        )
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = self.flatten(x)
        x = self.dense_layers(x)
        return x

In [40]:
import torch
import torch.nn as nn
import torch.optim as optim

# Assuming your DepressionDetector1DCNN and other classes are defined as shown earlier

def train_model(dataloaders, num_epochs=200, learning_rate=0.001):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = DepressionDetector1DCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(num_epochs):
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            if (epoch + 1) % 10 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}]')
                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

    return model

In [42]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np

class AudioDataset(Dataset):
    def __init__(self, mfcc_features, labels):
        self.features = torch.FloatTensor(mfcc_features)
        self.labels = torch.LongTensor(labels)
        
    def __len__(self):
        return len(self.labels)
        
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

class DepressionDetector1DCNN(nn.Module):
    def __init__(self, input_features=40):
        super(DepressionDetector1DCNN, self).__init__()
        
        self.conv_layers = nn.Sequential(
            nn.Conv1d(input_features, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(64),
            
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(128),
            
            nn.Conv1d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.BatchNorm1d(256)
        )
        
        self.flatten = nn.Flatten()
        
        # Calculate size after convolutions
        self.dense_layers = nn.Sequential(
            nn.Linear(256 * 8, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 2)
        )
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = self.flatten(x)
        x = self.dense_layers(x)
        return x

def train_model(mfcc_features_train, labels_train, mfcc_features_val, labels_val, 
                num_epochs=200, batch_size=16, learning_rate=0.001):
    
    # Create datasets and dataloaders
    train_dataset = AudioDataset(mfcc_features_train, labels_train)
    val_dataset = AudioDataset(mfcc_features_val, labels_val)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    
    # Initialize model, loss function, and optimizer
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = DepressionDetector1DCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # Training loop
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0
        
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            train_total += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()
            
        # Validation
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for features, labels in val_loader:
                features, labels = features.to(device), labels.to(device)
                outputs = model(features)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()
        
        # Print epoch statistics
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}]')
            print(f'Train Loss: {train_loss/len(train_loader):.4f}, '
                  f'Train Acc: {100.*train_correct/train_total:.2f}%')
            print(f'Val Loss: {val_loss/len(val_loader):.4f}, '
                  f'Val Acc: {100.*val_correct/val_total:.2f}%\n')
    
    return model

def evaluate_model(model, mfcc_features_test, labels_test, batch_size=16):
    test_dataset = AudioDataset(mfcc_features_test, labels_test)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
    device = next(model.parameters()).device
    model.eval()
    
    correct = 0
    total = 0
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features)
            _, predicted = outputs.max(1)
            
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    accuracy = 100. * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    
    return np.array(all_predictions), np.array(all_labels)




In [None]:
# Example usage:
if __name__ == "__main__":
    # Assuming your data is already preprocessed
    # mfcc_features shape: (num_samples, num_features, time_steps)
    # labels shape: (num_samples,)
    
    # Train the model
    model = train_model(mfcc_features_train, labels_train, 
                       mfcc_features_val, labels_val)
    
    # Evaluate
    predictions, true_labels = evaluate_model(model, mfcc_features_test, labels_test)