In [210]:
import matplotlib.pyplot as plt
import torch.optim as optim
import torch
import subprocess

from torch.utils.data import TensorDataset, DataLoader
from scipy.io import loadmat
from tqdm import tqdm
from torch import nn

In [211]:
# load the dataset
dataset_train = loadmat("training_data.mat")
dataset_validation = loadmat("validation_data.mat")

# extract the training and validation data
x_train = dataset_train["features_train"]
y_train = dataset_train["labels_train_int"].flatten()
x_val = dataset_validation["features_validation"]
y_val = dataset_validation["labels_validation_int"].flatten()

x_train_mean = x_train.mean(axis=0, keepdims=True)
x_train_std = x_train.std(axis=0, keepdims=True)

x_train_norm = (x_train - x_train_mean) / x_train_std
x_val_norm = (x_val - x_train_mean) / x_train_std

# Convert lists to tensors
x_train_tensor = torch.tensor(x_train_norm, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)

x_val_tensor = torch.tensor(x_val_norm, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

# Create TensorDataset
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
val_dataset = TensorDataset(x_val_tensor, y_val_tensor)

In [212]:
class Net(nn.Sequential):
    def __init__(
        self,
        input_dim=85,
        hidden_dim=[
            4,
        ],
        activation=nn.ReLU(),
        out_dim=10,
        dropout=0.0,
    ):
        assert len(hidden_dim) > 0, "at least one hidden layer"
        layers = []

        # first layer
        layers.append(nn.Linear(input_dim, hidden_dim[0]))
        layers.append(nn.BatchNorm1d(hidden_dim[0]))

        # dropout
        if dropout > 0.0:
            layers.append(nn.Dropout(dropout))

        # activation
        layers.append(activation)

        # hidden layers
        if len(hidden_dim) > 1:
            for k in range(1, len(hidden_dim)):
                layers.append(nn.Linear(hidden_dim[k - 1], hidden_dim[k]))
                layers.append(nn.BatchNorm1d(hidden_dim[k]))
                layers.append(nn.Dropout(dropout))
                layers.append(activation)

        layers.append(nn.Linear(hidden_dim[-1], out_dim))
        super().__init__(*layers)

# Training loop

In [213]:
def train_epoch(network, loss_fn, dataloader, optimizer, device="cpu"):
    # Set the network to train mode
    network.to(device)
    network.train()

    # Initialize variables to keep track of loss and number of batches
    epoch_loss = 0.0
    num_batches = 0
    epoch_correct = 0.0
    num_samples = 0

    # Iterate over the data loader
    for batch_inputs, batch_targets in dataloader:
        # Move data to the appropriate device (e.g., GPU)
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        batch_outputs = network(batch_inputs)

        # Compute the loss
        loss = loss_fn(batch_outputs, batch_targets)

        # Backward pass
        loss.backward()

        # Update the parameters
        optimizer.step()

        # Accumulate the loss
        epoch_loss += loss.item()
        num_batches += 1

        # Calculate the number of correct predictions in the batch
        _, predicted = torch.max(batch_outputs, 1)
        epoch_correct += (predicted == batch_targets).sum().item()
        num_samples += batch_targets.size(0)

    # Calculate the average loss for the epoch
    average_loss = epoch_loss / num_batches

    # Calculate the accuracy for the epoch
    accuracy = epoch_correct / num_samples

    return average_loss, accuracy

In [214]:
def validate_epoch(network, loss_fn, dataloader, device="cpu"):
    # Set the network to evaluation mode
    network.to(device)
    network.eval()

    # Initialize variables to keep track of loss and number of batches
    epoch_loss = 0.0
    num_batches = 0
    epoch_correct = 0.0
    num_samples = 0

    # Turn off gradients
    with torch.no_grad():
        # Iterate over the data loader
        for batch_inputs, batch_targets in dataloader:
            # Move data to the appropriate device (e.g., GPU)
            batch_inputs = batch_inputs.to(device)
            batch_targets = batch_targets.to(device)

            # Forward pass
            batch_outputs = network(batch_inputs)

            # Compute the loss
            loss = loss_fn(batch_outputs, batch_targets)

            # Accumulate the loss
            epoch_loss += loss.item()
            num_batches += 1

            # Calculate the number of correct predictions in the batch
            _, predicted = torch.max(batch_outputs, 1)
            epoch_correct += (predicted == batch_targets).sum().item()
            num_samples += batch_targets.size(0)

    # Calculate the average loss for the epoch
    average_loss = epoch_loss / num_batches

    # Calculate the accuracy for the epoch
    accuracy = epoch_correct / num_samples

    return average_loss, accuracy

In [215]:
def train(
    network,
    loss_fn,
    train_dataloader,
    val_dataloader,
    optimizer,
    num_epochs,
    device="cpu",
):
    train_losses = []
    val_losses = []
    val_acc = []
    train_acc = []

    # Initialize tqdm for progress tracking
    progress_bar = tqdm(range(num_epochs), desc="Training Progress")

    for epoch in progress_bar:
        # Training phase
        train_loss, train_accuracy = train_epoch(
            network, loss_fn, train_dataloader, optimizer, device=device
        )
        train_losses.append(train_loss)
        train_acc.append(train_accuracy)

        # Validation phase
        val_loss, val_accuracy = validate_epoch(
            network, loss_fn, val_dataloader, device=device
        )
        val_losses.append(val_loss)
        val_acc.append(val_accuracy)

        # Print the loss for each epoch
        # print(
        #     f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f},Val Loss: {val_loss:.4f} Val Acc: {accuracy:.4f}"
        # )

        # Update tqdm progress bar
        progress_bar.set_postfix(
            {
                "Train Loss": train_loss,
                "Val Loss": val_loss,
                "Train Acc": train_accuracy,
                "Val Acc": val_accuracy,
            }
        )

    return train_losses, val_losses, train_acc, val_acc

In [216]:
# # Define learning rate
# learning_rate = 0.03
# batch_size = 128
# num_epochs = 100

# # Define network
# net = Net(hidden_dim=[128, 128], dropout=0.25, activation=nn.ELU())

# # Create Adam optimizer
# optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.5)
# # optimizer = optim.Adam(net.parameters(), lr=learning_rate)
# loss_fn = nn.CrossEntropyLoss()


# train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# train_losses, val_losses, train_acc, val_acc = train(
#     net,
#     loss_fn,
#     train_dataloader,
#     val_dataloader,
#     optimizer,
#     num_epochs,
#     device="cpu",
# )

# plt.plot(train_losses, label="train loss")
# plt.plot(val_losses, label="val_loss")
# plt.legend()
# plt.xlabel("epoch")
# plt.ylabel("Loss")
# plt.show()

# fig, ax = plt.subplots()
# ax.plot(train_acc, label="train acc")
# ax.plot(val_acc, label="val acc")
# ax.legend()
# ax.set_xlabel("epoch")
# ax.set_ylabel("accuracy")
# plt.show()

# print(
#     f"Best validation accuracy: {max(val_acc) * 100:.2f}% in epoch {val_acc.index(max(val_acc)) + 1}"
# )

# # Save the model
# torch.save(net.state_dict(), "model.pth")

In [217]:
MATLAB_PATH = "/Applications/MATLAB_R2024a.app/bin/matlab"
AUDIO_FILE = "tres_2_3.wav"

# command = f"{MATLAB_PATH} -batch \"extract_features('{AUDIO_FILE}')\""
# subprocess.run(command, shell=True)

descriptor = loadmat("descriptor.mat")["descriptor"]
descriptor = (descriptor - x_train_mean) / x_train_std

testset = TensorDataset(torch.tensor(descriptor, dtype=torch.float32))
testloader = DataLoader(testset, batch_size=1, shuffle=False)

net = Net(hidden_dim=[128, 128], dropout=0.25, activation=nn.ELU())
net.load_state_dict(torch.load("model.pth"))
net.eval()

with torch.no_grad():
    for num in testloader:
        num = num[0]
        outputs = net(num)
        _, predicted = torch.max(outputs, 1)
        prediction = predicted.item()

print(f"Predicted digit: {prediction}")

Predicted digit: 3
