In [None]:
import os
import gc

import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter

from sklearn.preprocessing import StandardScaler

In [None]:
DATASET_DIR = os.path.relpath("data/")
os.makedirs(DATASET_DIR, exist_ok=True)

In [None]:
# def save_as_tensor(data, filename):
#     """
#     Save the data as a torch tensor.
#     """
#     print(f"Saving {filename}...")
#     if os.path.exists(os.path.join(DATASET_DIR, filename)):
#         print(f"{filename} already exists. Skipping.")
#         return
#     data = torch.from_numpy(data)
#     torch.save(data, os.path.join(DATASET_DIR, filename))
    

# lmfcc = np.load("lmfcc.npz", allow_pickle=True)
# save_as_tensor(lmfcc["lmfcc_train_x"], "lmfcc_train_x.pt")
# save_as_tensor(lmfcc["lmfcc_val_x"], "lmfcc_val_x.pt")
# save_as_tensor(lmfcc["lmfcc_test_x"], "lmfcc_test_x.pt")
# np.savez(os.path.join(DATASET_DIR, "lmfcc_scaler.npz"), lmfcc_scaler=lmfcc["lmfcc_scaler"])
# del lmfcc # to free memory
# gc.collect()

# mspec = np.load("mspec.npz", allow_pickle=True)
# save_as_tensor(mspec["mspec_train_x"], "mspec_train_x.pt")
# save_as_tensor(mspec["mspec_val_x"], "mspec_val_x.pt")
# save_as_tensor(mspec["mspec_test_x"], "mspec_test_x.pt")
# np.savez(os.path.join(DATASET_DIR, "mspec_scaler.npz"), mspec_scaler=mspec["mspec_scaler"])
# del mspec
# gc.collect()

# targets = np.load("targets.npz", allow_pickle=True)
# save_as_tensor(targets["train_y"], "train_y.pt")
# save_as_tensor(targets["val_y"], "val_y.pt")
# save_as_tensor(targets["test_y"], "test_y.pt")
# del targets
# gc.collect()

Saving lmfcc_train_x.pt...
lmfcc_train_x.pt already exists. Skipping.
Saving lmfcc_val_x.pt...
lmfcc_val_x.pt already exists. Skipping.
Saving lmfcc_test_x.pt...
lmfcc_test_x.pt already exists. Skipping.
Saving mspec_train_x.pt...
Saving mspec_val_x.pt...
Saving mspec_test_x.pt...
Saving train_y.pt...
Saving val_y.pt...
Saving test_y.pt...


In [None]:
def load_data(feature_type="lmfcc"):
    """
    Load the data from the dataset directory.
    """
    print("Loading data...")
    train_x = torch.load(os.path.join(DATASET_DIR, "{}_train_x.pt".format(feature_type)))
    val_x = torch.load(os.path.join(DATASET_DIR, "{}_val_x.pt".format(feature_type)))
    test_x = torch.load(os.path.join(DATASET_DIR, "{}_test_x.pt".format(feature_type)))
    
    train_y = torch.load(os.path.join(DATASET_DIR, "train_y.pt"))
    val_y = torch.load(os.path.join(DATASET_DIR, "val_y.pt"))
    test_y = torch.load(os.path.join(DATASET_DIR, "test_y.pt"))
    
    return train_x, val_x, test_x, train_y, val_y, test_y

X_train, X_val, X_test, y_train, y_val, y_test = load_data("lmfcc")

Loading data...


In [None]:
class PhonemeClassifier(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(PhonemeClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = F.log_softmax(x, dim=1)  # Use log_softmax for numerical stability
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = PhonemeClassifier(X_train.shape[1], y_train.shape[1])
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

model = model.to(device)

In [None]:
from tqdm import tqdm


batch_size = 256

X_train = X_train.to(device)
X_val = X_val.to(device)
y_train = y_train.to(device)
y_val = y_val.to(device)

# create the data loaders for training and validation sets
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataset = torch.utils.data.TensorDataset(X_val, y_val)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# setup logging so that you can follow training using TensorBoard (see https://pytorch.org/docs/stable/tensorboard.html)
writer = SummaryWriter()

# train the network
num_epochs = 100


for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for inputs, labels in tqdm(train_loader):
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()
        # accumulate the training loss
        train_loss += loss.item()

    # calculate the validation loss
    model.eval()
    with torch.no_grad():
        val_loss = 0.0
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels.float())
            val_loss += loss.item()

    # print the epoch loss
    train_loss /= len(train_loader)
    val_loss /= len(val_loader)

    print(f'Epoch {epoch}: train_loss={train_loss}, val_loss={val_loss}')
    writer.add_scalars('loss',{'train':train_loss,'val':val_loss}, epoch)

writer.flush()
# save the trained network
torch.save(model.state_dict(), 'trained-model.pt')

100%|██████████| 5277/5277 [00:26<00:00, 202.65it/s]


Epoch 0: train_loss=0.549325388592031, val_loss=0.7376735039529365


 25%|██▌       | 1321/5277 [00:06<00:19, 200.08it/s]


KeyboardInterrupt: 