In [33]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os

In [34]:
class MultiLabelMLP(nn.Module):
    def __init__(self, input_size, hidden_sizes, num_classes):
        """
        Initializes the MultiLabelMLP model with multiple hidden layers.

        Parameters:
        - input_size: The size of the input features.
        - hidden_sizes: A list of sizes for each hidden layer.
        - num_classes: The number of classes (output size).
        """
        super(MultiLabelMLP, self).__init__()
        
        # Create the first hidden layer
        self.hidden_layers = nn.ModuleList([nn.Linear(input_size, hidden_sizes[0])])
        
        # Add any additional hidden layers
        layer_sizes = zip(hidden_sizes[:-1], hidden_sizes[1:])
        self.hidden_layers.extend([nn.Linear(h1, h2) for h1, h2 in layer_sizes])
        
        # Create the output layer
        self.output_layer = nn.Linear(hidden_sizes[-1], num_classes)
    
    def forward(self, x):
        # Apply each hidden layer with ReLU activation
        for layer in self.hidden_layers:
            x = F.relu(layer(x))
        
        # Output layer
        x = self.output_layer(x)
        return x

In [37]:
def load_npz_file_with_condition(file_path, max_size: int):
        """
        Loads an .npz file. If the file is over 1GB, it uses mmap_mode='r'.

        Parameters:
        - file_path: The path to the .npz file.

        Returns:
        - A dictionary-like object with lazy loading for large files or directly loaded data for smaller files.
        """
        file_size = os.path.getsize(file_path)

        if file_size > max_size:
            print(f"File size is {file_size / (1024**2):.2f}MB. Using mmap_mode='r'.")
            data = np.load(file_path, mmap_mode="r", allow_pickle=True)
        else:
            print(f"File size is {file_size / (1024**2):.2f}MB. Loading normally.")
            data = np.load(file_path, allow_pickle=True)

        return data

# Assuming MultiLabelMLP is defined as in the previous example

class FrameDataset(Dataset):
    def __init__(self, npz_path):
        self.data = load_npz_file_with_condition(npz_path, max_size = 1024**3)
        self.keys = [k for k in self.data.keys() if '_data' in k]
    
    def __len__(self):
        return len(self.keys)

    def __getitem__(self, idx):
        data_key = self.keys[idx]
        data = self.data[data_key]
        labels = self.data[f'{data_key.split("_data_")[0]}_labels']
        # Assuming data is already normalized and just needs to be reshaped/flattened
        return torch.tensor(data.reshape(-1), dtype=torch.float32), torch.tensor(labels, dtype=torch.float32)

def train_model(model, dataloader, criterion, optimizer, epochs=5):
    for epoch in range(epochs):
        total_loss = 0
        correct_predictions = 0
        total_predictions = 0
        for data, labels in dataloader:
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            predicted = torch.sigmoid(outputs) > 0.5  # Apply sigmoid and threshold
            correct_predictions += (predicted == labels).float().sum()
            total_predictions += torch.numel(labels)

        accuracy = correct_predictions / total_predictions
        print(f'Epoch {epoch+1}, Loss: {total_loss}, Accuracy: {accuracy.item()}')

npz_path = 'processed_audio.npz'
dataset = FrameDataset(npz_path)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

# Define model, criterion, and optimizer
input_size = 96 * 87  # Flattened frame size
hidden_sizes = [256, 256, 256, 256]  # Example hidden layer sizes
num_classes = 3  # Adjust based on your label dimensionality
model = MultiLabelMLP(input_size, hidden_sizes, num_classes)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

train_model(model, dataloader, criterion, optimizer, epochs=50)

File size is 2.65MB. Loading normally.
Epoch 1, Loss: 25.712136805057526, Accuracy: 0.682170569896698
Epoch 2, Loss: 15.753159284591675, Accuracy: 0.838178277015686
Epoch 3, Loss: 12.467473968863487, Accuracy: 0.856589138507843
Epoch 4, Loss: 10.571014881134033, Accuracy: 0.873062014579773
Epoch 5, Loss: 11.53198317438364, Accuracy: 0.8614341020584106
Epoch 6, Loss: 10.268832549452782, Accuracy: 0.8740310072898865
Epoch 7, Loss: 9.685718178749084, Accuracy: 0.8817829489707947
Epoch 8, Loss: 9.165742952376604, Accuracy: 0.8817829489707947
Epoch 9, Loss: 9.622019652277231, Accuracy: 0.8808139562606812
Epoch 10, Loss: 8.617437899112701, Accuracy: 0.8963178396224976
Epoch 11, Loss: 8.67357673496008, Accuracy: 0.8943798542022705
Epoch 12, Loss: 8.640702792443335, Accuracy: 0.8924418687820435
Epoch 13, Loss: 8.38176771812141, Accuracy: 0.8875969052314758
Epoch 14, Loss: 8.125590207986534, Accuracy: 0.8992248177528381
Epoch 15, Loss: 7.955198008567095, Accuracy: 0.9050387740135193
Epoch 16, L