In [31]:
# MODEL DEFINITON

from torch.nn import Module
from torch.nn import Conv2d
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import Sequential
from torch.nn import BatchNorm2d
from torch.nn import ELU
from torch import flatten

class MultiLabelCNN(Module):
    def __init__(self, num_classes):
        super(MultiLabelCNN, self).__init__()
        
        self.hidden_layers = Sequential (
            Conv2d(in_channels=1, out_channels=64, kernel_size=(3,3)),
            BatchNorm2d(64),
            ELU(),
            MaxPool2d((2,2)),

            Conv2d(in_channels=64, out_channels=128, kernel_size=(3,3)),
            BatchNorm2d(128),
            ELU(),
            MaxPool2d((2, 2)),

            Conv2d(in_channels=128, out_channels=256, kernel_size=(3,3)),
            BatchNorm2d(256),
            ELU(),
            MaxPool2d((3, 3)),

            Conv2d(in_channels=256, out_channels=256, kernel_size=(3,3)),
            BatchNorm2d(256),
            ELU(),
            MaxPool2d((3, 3)),
        )

        self.linear_layers = Sequential(
            Linear(in_features=256, out_features=128),  # Adjusted input features to match flattened conv output
            ELU(),
            Linear(in_features=128, out_features=num_classes)
        )

    def forward(self, x):
        # Pass input through the convolutional layers
        x = self.hidden_layers(x)
        
        # Flatten the output of the convolutional layers to fit linear layer input
        x = flatten(x, 1)  # Flatten all dimensions except the batch
        
        # Pass data through linear layers
        x = self.linear_layers(x)
        
        return x


In [32]:
# compress
from utils import *
from sklearn.preprocessing import LabelEncoder
import os

print(os.getcwd())

curr = "../../../sample_audio_training/"

folders = ['oboe', 'trumpet', 'violin']
files = []
labels = []

for folder in folders:
    folderPath = os.path.join(curr, folder)
    for filename in os.listdir(folderPath):
        file_path = os.path.join(folderPath, filename)
        if os.path.isfile(file_path):  # Make sure it's a file, not a directory or a symlink
            files.append(file_path)
            labels.append(folder)

label_encoder = LabelEncoder()
numeric_labels = label_encoder.fit_transform(labels)

process_and_save_audio(files=files, labels=numeric_labels, output_path="data.npz", sr=22050, add_noise=False)

/Users/tvalencia/umich/w24/engr100/project3/src/ML/CNN
3


In [33]:
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from typing import Tuple
import logging, sys
import numpy as np

def load_npz_file_with_condition(file_path, max_size: int):
    file_size = os.path.getsize(file_path)

    if file_size > max_size:
        logging.info(
            f"File size is {file_size / (1024**2):.2f}MB. Using mmap_mode='r'."
        )
        data = np.load(file_path, mmap_mode="r", allow_pickle=True)
    else:
        logging.info(f"File size is {file_size / (1024**2):.2f}MB. Loading normally.")
        data = np.load(file_path, allow_pickle=True)

    return data


class FrameDataset(Dataset):
    def __init__(self, npz_path):
        self.data = load_npz_file_with_condition(npz_path, max_size=1024**3)
        self.keys = [k for k in self.data.keys() if "_data" in k]

    def __len__(self):
        return len(self.keys)

    # return a 96 x 87 matrix for the CNN
    def __getitem__(self, idx):
        data_key = self.keys[idx]
        data = self.data[data_key]
        labels = self.data[f'{data_key.split("_data_")[0]}_labels']
        return torch.tensor(data.reshape(1, 96, 87), dtype=torch.float32), torch.tensor(
            labels, dtype=torch.float32
        )
    
def train_model(
    model, train_dataloader, validation_dataloader, criterion, optimizer, epochs=5
) -> Tuple[list, list]:
    train_accuracies = []
    validation_accuracies = []

    for epoch in range(epochs):
        logging.info(f"Epoch {epoch+1}")
        model.train()  # Set model to training mode
        total_loss = 0
        correct_predictions = 0
        total_predictions = 0
        for data, labels in train_dataloader:
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            predicted = torch.sigmoid(outputs) > 0.5
            correct_predictions += (predicted == labels).float().sum()
            total_predictions += torch.numel(labels)

        train_accuracy = correct_predictions / total_predictions
        logging.info(f"Loss: {total_loss}")
        logging.info(f"Train Accuracy: {train_accuracy.item()}")
        train_accuracies.append(train_accuracy.item())

        # Validation phase
        model.eval()  # Set model to evaluation mode
        with torch.no_grad():
            correct_predictions = 0
            total_predictions = 0
            for data, labels in validation_dataloader:
                outputs = model(data)
                predicted = torch.sigmoid(outputs) > 0.5
                correct_predictions += (predicted == labels).float().sum()
                total_predictions += torch.numel(labels)

            validation_accuracy = correct_predictions / total_predictions
            logging.info(f"Validation Accuracy: {validation_accuracy.item()}")
            validation_accuracies.append(validation_accuracy.item())

    return train_accuracies, validation_accuracies


In [34]:
from torch.nn import BCEWithLogitsLoss
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split

# load data and make dataset
npz_path = "data.npz"
mel_dataset = FrameDataset(npz_path)

print("split dataset")
# split dataset into training, validation, and testing
# sizes of 80% 10% and 10%
train_size = int(0.8 * len(mel_dataset))
validation_size = int(0.1 * len(mel_dataset))
test_size = len(mel_dataset) - train_size - validation_size

# split data sets into their respective sizes
train_dataset, validation_dataset, test_dataset = random_split(
    mel_dataset, [train_size, validation_size, test_size]
)

print("create dataloaders")
# create dataloaders
train_dataloader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
validation_dataloader = DataLoader(
    validation_dataset, batch_size=32, shuffle=True
)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

print("make model")
# make model
num_classes = 3
model = MultiLabelCNN(num_classes)
criterion = BCEWithLogitsLoss() # loss function
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) # optimizer

print("training")
train_accuracies, validation_accuracies = train_model(
    model, train_dataloader, validation_dataloader, criterion, optimizer, epochs=20
)

KeyboardInterrupt: 