In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

# dynamic lmfcc

In [2]:
mfcc_train_x = np.load("lmfcc_train_x_dy.npz")['X']
mfcc_val_x   = np.load("lmfcc_val_x_dy.npz")['X']
mfcc_test_x  = np.load("lmfcc_test_x_dy.npz")['X']

train_y = np.load("train_y_dy.npz")['y']
val_y   = np.load("val_y_dy.npz")['y']
test_y  = np.load("test_y_dy.npz")['y']

train_x = torch.tensor(mfcc_train_x, dtype=torch.float32)
val_x   = torch.tensor(mfcc_val_x,   dtype=torch.float32)
test_x  = torch.tensor(mfcc_test_x,  dtype=torch.float32)

train_y = torch.tensor(train_y, dtype=torch.long)
val_y   = torch.tensor(val_y,   dtype=torch.long)
test_y  = torch.tensor(test_y,  dtype=torch.long)

input_dim = mfcc_train_x.shape[1]
hidden_dim = 256
output_dim = len(torch.unique(train_y))
batch_size = 256
num_epochs = 30
learning_rate = 1e-3

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

train_loader = DataLoader(TensorDataset(train_x, train_y), batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(TensorDataset(val_x, val_y),     batch_size=batch_size)

writer = SummaryWriter()

for epoch in range(num_epochs):
    net.train()
    train_loss = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * inputs.size(0)
    train_loss /= len(train_loader.dataset)

    net.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    val_loss /= len(val_loader.dataset)
    accuracy = correct / total

    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}, Val Acc = {accuracy:.4f}")
    writer.add_scalars("Loss", {"Train": train_loss, "Val": val_loss}, epoch)
    writer.add_scalar("Accuracy/Val", accuracy, epoch)

torch.save(net.state_dict(), "model_lmfcc_dy_2layer.pt")


Epoch 1: Train Loss = 0.8192, Val Loss = 0.7196, Val Acc = 0.7609
Epoch 2: Train Loss = 0.6495, Val Loss = 0.6802, Val Acc = 0.7701
Epoch 3: Train Loss = 0.6011, Val Loss = 0.6476, Val Acc = 0.7846
Epoch 4: Train Loss = 0.5678, Val Loss = 0.6452, Val Acc = 0.7853
Epoch 5: Train Loss = 0.5490, Val Loss = 0.6478, Val Acc = 0.7860
Epoch 6: Train Loss = 0.5354, Val Loss = 0.6426, Val Acc = 0.7875
Epoch 7: Train Loss = 0.5256, Val Loss = 0.6360, Val Acc = 0.7911
Epoch 8: Train Loss = 0.5174, Val Loss = 0.6430, Val Acc = 0.7891
Epoch 9: Train Loss = 0.5104, Val Loss = 0.6505, Val Acc = 0.7886
Epoch 10: Train Loss = 0.5043, Val Loss = 0.6467, Val Acc = 0.7921
Epoch 11: Train Loss = 0.4994, Val Loss = 0.6474, Val Acc = 0.7907
Epoch 12: Train Loss = 0.4946, Val Loss = 0.6369, Val Acc = 0.7942
Epoch 13: Train Loss = 0.4907, Val Loss = 0.6445, Val Acc = 0.7929
Epoch 14: Train Loss = 0.4874, Val Loss = 0.6682, Val Acc = 0.7884
Epoch 15: Train Loss = 0.4841, Val Loss = 0.6556, Val Acc = 0.7920
Epoc

# non dynamic lmfcc

In [4]:
mfcc_train_x = np.load("lmfcc_train_x_nd.npz")['X']
mfcc_val_x   = np.load("lmfcc_val_x_nd.npz")['X']
mfcc_test_x  = np.load("lmfcc_test_x_nd.npz")['X']

train_y = np.load("train_y_nd.npz")['y']
val_y   = np.load("val_y_nd.npz")['y']
test_y  = np.load("test_y_nd.npz")['y']

train_x = torch.tensor(mfcc_train_x, dtype=torch.float32)
val_x   = torch.tensor(mfcc_val_x,   dtype=torch.float32)
test_x  = torch.tensor(mfcc_test_x,  dtype=torch.float32)

train_y = torch.tensor(train_y, dtype=torch.long)
val_y   = torch.tensor(val_y,   dtype=torch.long)
test_y  = torch.tensor(test_y,  dtype=torch.long)

input_dim = mfcc_train_x.shape[1]
hidden_dim = 256
output_dim = len(torch.unique(train_y))
batch_size = 100
num_epochs = 20
learning_rate = 1e-3

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

train_loader = DataLoader(TensorDataset(train_x, train_y), batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(TensorDataset(val_x, val_y),     batch_size=batch_size)

writer = SummaryWriter()

for epoch in range(num_epochs):
    net.train()
    train_loss = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * inputs.size(0)
    train_loss /= len(train_loader.dataset)

    net.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    val_loss /= len(val_loader.dataset)
    accuracy = correct / total

    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}, Val Acc = {accuracy:.4f}")
    writer.add_scalars("Loss", {"Train": train_loss, "Val": val_loss}, epoch)
    writer.add_scalar("Accuracy/Val", accuracy, epoch)

torch.save(net.state_dict(), "model_lmfcc_nd_2layer.pt")


Epoch 1: Train Loss = 1.2462, Val Loss = 1.2434, Val Acc = 0.6075
Epoch 2: Train Loss = 1.1453, Val Loss = 1.2232, Val Acc = 0.6136
Epoch 3: Train Loss = 1.1168, Val Loss = 1.2200, Val Acc = 0.6154
Epoch 4: Train Loss = 1.0999, Val Loss = 1.2212, Val Acc = 0.6182
Epoch 5: Train Loss = 1.0875, Val Loss = 1.2338, Val Acc = 0.6137
Epoch 6: Train Loss = 1.0781, Val Loss = 1.2302, Val Acc = 0.6149
Epoch 7: Train Loss = 1.0709, Val Loss = 1.2222, Val Acc = 0.6159
Epoch 8: Train Loss = 1.0649, Val Loss = 1.2359, Val Acc = 0.6156
Epoch 9: Train Loss = 1.0599, Val Loss = 1.2403, Val Acc = 0.6138
Epoch 10: Train Loss = 1.0556, Val Loss = 1.2400, Val Acc = 0.6132
Epoch 11: Train Loss = 1.0519, Val Loss = 1.2445, Val Acc = 0.6143
Epoch 12: Train Loss = 1.0486, Val Loss = 1.2391, Val Acc = 0.6148
Epoch 13: Train Loss = 1.0456, Val Loss = 1.2598, Val Acc = 0.6113
Epoch 14: Train Loss = 1.0428, Val Loss = 1.2530, Val Acc = 0.6104
Epoch 15: Train Loss = 1.0404, Val Loss = 1.2657, Val Acc = 0.6110
Epoc

# N-d mspec

In [5]:
mspec_train_x = np.load("mspec_train_x_nd.npz")["X"]
mspec_val_x   = np.load("mspec_val_x_nd.npz")["X"]
mspec_test_x  = np.load("mspec_test_x_nd.npz")["X"]

train_y = np.load("train_y_nd.npz")["y"]
val_y   = np.load("val_y_nd.npz")["y"]
test_y  = np.load("test_y_nd.npz")["y"]

train_x = torch.tensor(mspec_train_x, dtype=torch.float32)
val_x   = torch.tensor(mspec_val_x, dtype=torch.float32)
test_x  = torch.tensor(mspec_test_x, dtype=torch.float32)

train_y = torch.tensor(train_y, dtype=torch.long)
val_y   = torch.tensor(val_y, dtype=torch.long)
test_y  = torch.tensor(test_y, dtype=torch.long)

input_dim = mspec_train_x.shape[1]
hidden_dim = 256
output_dim = len(torch.unique(train_y))
batch_size = 100
num_epochs = 20
learning_rate = 1e-3
patience = 3

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

train_loader = DataLoader(TensorDataset(train_x, train_y), batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(TensorDataset(val_x, val_y),     batch_size=batch_size)

writer = SummaryWriter()

best_val_loss = float('inf')
patience_counter = 0

for epoch in range(num_epochs):
    net.train()
    train_loss = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * inputs.size(0)
    train_loss /= len(train_loader.dataset)

    net.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    val_loss /= len(val_loader.dataset)
    accuracy = correct / total

    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}, Val Acc = {accuracy:.4f}")
    writer.add_scalars("Loss", {"Train": train_loss, "Val": val_loss}, epoch)
    writer.add_scalar("Accuracy/Val", accuracy, epoch)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        torch.save(net.state_dict(), "model_mspec_nd_2layer.pt")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping triggered at epoch {epoch+1}")
            break


Epoch 1: Train Loss = 1.2692, Val Loss = 1.2167, Val Acc = 0.6185
Epoch 2: Train Loss = 1.1310, Val Loss = 1.1718, Val Acc = 0.6320
Epoch 3: Train Loss = 1.0908, Val Loss = 1.1653, Val Acc = 0.6326
Epoch 4: Train Loss = 1.0659, Val Loss = 1.1884, Val Acc = 0.6318
Epoch 5: Train Loss = 1.0489, Val Loss = 1.1822, Val Acc = 0.6309
Epoch 6: Train Loss = 1.0367, Val Loss = 1.1873, Val Acc = 0.6313
Early stopping triggered at epoch 6


# dynamic mspec

In [7]:
mspec_train_x = np.load("mspec_train_x_dy.npz")["X"]
mspec_val_x   = np.load("mspec_val_x_dy.npz")["X"]


train_y = np.load("train_y_dy.npz")["y"]
val_y   = np.load("val_y_dy.npz")["y"]


train_x = torch.tensor(mspec_train_x, dtype=torch.float32)
val_x   = torch.tensor(mspec_val_x, dtype=torch.float32)


train_y = torch.tensor(train_y, dtype=torch.long)
val_y   = torch.tensor(val_y, dtype=torch.long)


input_dim = mspec_train_x.shape[1]
hidden_dim = 256
output_dim = len(torch.unique(train_y))
batch_size = 256
num_epochs = 20
learning_rate = 1e-3
patience = 5

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

train_loader = DataLoader(TensorDataset(train_x, train_y), batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(TensorDataset(val_x, val_y),     batch_size=batch_size)

writer = SummaryWriter()

best_val_loss = float('inf')
patience_counter = 0

for epoch in range(num_epochs):
    net.train()
    train_loss = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * inputs.size(0)
    train_loss /= len(train_loader.dataset)

    net.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    val_loss /= len(val_loader.dataset)
    accuracy = correct / total

    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}, Val Acc = {accuracy:.4f}")
    writer.add_scalars("Loss", {"Train": train_loss, "Val": val_loss}, epoch)
    writer.add_scalar("Accuracy/Val", accuracy, epoch)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        torch.save(net.state_dict(), "model_mspec_dy_2layer.pt")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping triggered at epoch {epoch+1}")
            break


MemoryError: Unable to allocate 1.41 GiB for an array with shape (379634360,) and data type float32

# evaluation just for dynamic mspec