In [64]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import numpy as np
import pandas as pd
from random import shuffle
import librosa
import os
from tqdm import tqdm

In [65]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [66]:
input_dim = 500
num_classes = 4
alpha = 0.0001
num_epochs = 200
batch_size = 512
hidden_layer = 128
classification_weight = 0.1

In [67]:
def get_one_hot(label_num, num_classes=4):
    one_hot = np.zeros((1, num_classes))
    one_hot[0, int(label_num)] = 1
    return one_hot

In [68]:
def load_data():
    print('Reading data...')
    songs = []
    onehotlabels = []

    all_genres = ['Classical', 'Jazz', 'Pop']

    numsplit = 20
    sizesplit = input_dim

    for index in range(len(all_genres)):
        for foldername, subfolders, filenames in tqdm(os.walk('../wav-Data/' + all_genres[index])):
            for filename in filenames:
                if filename.endswith(".wav"):
                    try:
                        audio, _ = librosa.load(os.path.join(foldername, filename))
                    except Exception as e:
                        print(f'Error encountered: {e}')
                        continue

                    audio = audio[:600000]
                    if len(audio) % 40 != 0:
                        audio = np.pad(audio, (0, 40 - len(audio) % 40), 'constant', constant_values=np.mean(audio[-(40 - len(audio) % 40):]))
                    audio = audio.reshape(-1, 40)
                    audio = np.mean(audio, axis=1)

                    for j in range(numsplit):
                        start_index = sizesplit * j
                        if start_index >= len(audio):
                            break
                        end_index = sizesplit * (j + 1)
                        if end_index >= len(audio):
                            end_index = len(audio)-1
                        songs.append(audio[start_index:end_index])
                        onehotlabels.append(get_one_hot(index)[0])

    songs = pd.DataFrame(songs)
    onehotlabels = pd.DataFrame(onehotlabels)
    print('Data reading done :)')
    return songs, onehotlabels


In [69]:
songs, labels = load_data()

# Shuffling training set
ind_list = [i for i in range(songs.shape[0])]
shuffle(ind_list)
songs = songs.iloc[ind_list]
labels = labels.iloc[ind_list]

songs_train = songs.iloc[0:6000].values
songs_dev = songs.iloc[6000:].values
labels_train = labels.iloc[0:6000].values
labels_dev = labels.iloc[6000:].values

train_dataset = data.TensorDataset(torch.from_numpy(songs_train).float(), torch.from_numpy(labels_train).long())
dev_dataset = data.TensorDataset(torch.from_numpy(songs_dev).float(), torch.from_numpy(labels_dev).long())

train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = data.DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)

Reading data...


5it [00:34,  6.91s/it]
2it [00:16,  8.43s/it]
2it [00:22, 11.31s/it]


Data reading done :)


## Softmax

In [70]:
class SoftmaxClassifier(nn.Module):
    def __init__(self, input_dim, num_classes, hidden=128):
        super(SoftmaxClassifier, self).__init__()

        self.classifier = nn.Sequential(
            nn.Linear(input_dim, hidden),
            nn.Tanh(),
            nn.Linear(hidden, num_classes)
        )

    def forward(self, x):
        return self.classifier(x)

In [71]:
def train_softmax(model, train_loader, dev_loader, criterion, optimizer, num_epochs):
    model.train()
    
    train_accuracies = []
    dev_accuracies = []
    loss_per_epoch = []

    for epoch in range(num_epochs):
        total_loss = 0
        correct_train = 0

        for inputs_batch, labels_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs_batch)
            loss = criterion(outputs, labels_batch.float())
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            _, labels_batch = torch.max(labels_batch.data, 1)
            correct_train += (predicted == labels_batch).sum().item()

        accuracy_train = correct_train / len(train_loader.dataset)
        print(f"Epoch {epoch+1}, Train Accuracy: {accuracy_train}")

        model.eval()
        correct_dev = 0

        with torch.no_grad():
            for inputs_batch, labels_batch in dev_loader:
                outputs = model(inputs_batch)
                _, predicted = torch.max(outputs.data, 1)
                _, labels_batch = torch.max(labels_batch.data, 1)
                correct_dev += (predicted == labels_batch).sum().item()

        accuracy_dev = correct_dev / len(dev_loader.dataset)
        print(f"Epoch {epoch+1}, Dev Accuracy: {accuracy_dev}")

        train_accuracies.append(accuracy_train)
        dev_accuracies.append(accuracy_dev)
        loss_per_epoch.append(total_loss / len(train_loader))

    return train_accuracies, dev_accuracies, loss_per_epoch

In [72]:
model = SoftmaxClassifier(input_dim, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=alpha)

train_accuracies, dev_accuracies, loss_per_epoch = train_softmax(model, train_loader, dev_loader, criterion, optimizer, num_epochs)

print("Train Accuracy:", train_accuracies[-1])
print("Val Accuracy:", dev_accuracies[-1])

Epoch 1, Train Accuracy: 0.20166666666666666
Epoch 1, Dev Accuracy: 0.21911357340720222
Epoch 2, Train Accuracy: 0.23166666666666666
Epoch 2, Dev Accuracy: 0.24875346260387812
Epoch 3, Train Accuracy: 0.2795
Epoch 3, Dev Accuracy: 0.30498614958448755
Epoch 4, Train Accuracy: 0.3625
Epoch 4, Dev Accuracy: 0.39002770083102495
Epoch 5, Train Accuracy: 0.4325
Epoch 5, Dev Accuracy: 0.40969529085872575
Epoch 6, Train Accuracy: 0.4675
Epoch 6, Dev Accuracy: 0.47257617728531853
Epoch 7, Train Accuracy: 0.517
Epoch 7, Dev Accuracy: 0.5058171745152354
Epoch 8, Train Accuracy: 0.5401666666666667
Epoch 8, Dev Accuracy: 0.528393351800554
Epoch 9, Train Accuracy: 0.5583333333333333
Epoch 9, Dev Accuracy: 0.5412742382271468
Epoch 10, Train Accuracy: 0.569
Epoch 10, Dev Accuracy: 0.5476454293628809
Epoch 11, Train Accuracy: 0.5746666666666667
Epoch 11, Dev Accuracy: 0.5527700831024931
Epoch 12, Train Accuracy: 0.5796666666666667
Epoch 12, Dev Accuracy: 0.5558171745152355
Epoch 13, Train Accuracy: 0.5

## Deep Softmax Autoencoder

In [73]:
class AutoencoderClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(AutoencoderClassifier, self).__init__()

        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.Sigmoid(),
            nn.Linear(256, 192),
            nn.Tanh(),
            nn.Linear(192, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )

        self.decoder = nn.Sequential(
            nn.Linear(64, 128),
            nn.Sigmoid(),
            nn.Linear(128, 192),
            nn.Sigmoid(),
            nn.Linear(192, 256),
            nn.ReLU(),
            nn.Linear(256, input_dim),
            nn.ReLU()
        )

        self.classifier = nn.Sequential(
            nn.Linear(64, 32),
            nn.Tanh(),
            nn.Linear(32, 16),
            nn.Tanh(),
            nn.Linear(16, num_classes)
        )

    def forward(self, x):
        encoding = self.encoder(x)
        decoding = self.decoder(encoding)
        classification = self.classifier(encoding)
        return encoding, decoding, classification

In [76]:
def train_autoencoder(model, train_loader, dev_loader, criterion, optimizer, num_epochs=200):
    model.train()

    train_accuracies = []
    dev_accuracies = []
    loss_per_epoch = []

    for epoch in range(num_epochs):
        cost_list = []
        correct_train = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            encoding, decoding, preds = model(inputs)
            reconstruction_loss = criterion(decoding, inputs)
            classification_loss = classification_weight * criterion(preds, labels.float())  # Convert labels to float
            loss = reconstruction_loss + classification_loss
            loss.backward()
            optimizer.step()

            _, predictions = torch.max(preds, 1)
            _, labels = torch.max(labels, 1)
            correct_train += torch.sum(predictions == labels).item()

            cost_list.append(loss.item())

        accuracy_train = correct_train / float(len(train_loader.dataset))
        print(f"Epoch {epoch + 1}, Train Accuracy: {accuracy_train}")
        loss_per_epoch = float(sum(cost_list)) / len(cost_list)
        print(f"Epoch {epoch + 1}, Train Loss: {loss_per_epoch}")

        model.eval()
        correct_dev = 0
        with torch.no_grad():
            for inputs, labels in dev_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                _, _, preds = model(inputs)

                _, predictions = torch.max(preds, 1)
                _, labels = torch.max(labels, 1)
                correct_dev += torch.sum(predictions == labels).item()

        accuracy_dev = correct_dev / float(len(dev_loader.dataset))
        print(f"Test Accuracy: {accuracy_dev}")

        train_accuracies.append(accuracy_train)
        dev_accuracies.append(accuracy_dev)
    
    return train_accuracies, dev_accuracies, loss_per_epoch

In [78]:
model = AutoencoderClassifier(input_dim, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=alpha)

train_accuracies, dev_accuracies, loss_per_epoch = train_autoencoder(model, train_loader, dev_loader, criterion, optimizer, num_epochs)

print("Train Accuracy:", train_accuracies[-1])
print("Val Accuracy:", dev_accuracies[-1])

Epoch 1, Train Accuracy: 0.5455
Epoch 1, Train Loss: 0.13333338809510073
Test Accuracy: 0.5915512465373961
Epoch 2, Train Accuracy: 0.5981666666666666
Epoch 2, Train Loss: 0.12473893538117409
Test Accuracy: 0.5915512465373961
Epoch 3, Train Accuracy: 0.5981666666666666
Epoch 3, Train Loss: 0.1167018786072731
Test Accuracy: 0.5915512465373961
Epoch 4, Train Accuracy: 0.5981666666666666
Epoch 4, Train Loss: 0.10676331507662933
Test Accuracy: 0.5915512465373961
Epoch 5, Train Accuracy: 0.5981666666666666
Epoch 5, Train Loss: 0.09464194563527902
Test Accuracy: 0.5915512465373961
Epoch 6, Train Accuracy: 0.5981666666666666
Epoch 6, Train Loss: 0.0835690548022588
Test Accuracy: 0.5915512465373961
Epoch 7, Train Accuracy: 0.5981666666666666
Epoch 7, Train Loss: 0.07096674044926961
Test Accuracy: 0.5915512465373961
Epoch 8, Train Accuracy: 0.5981666666666666
Epoch 8, Train Loss: 0.058926244266331196
Test Accuracy: 0.5915512465373961
Epoch 9, Train Accuracy: 0.5981666666666666
Epoch 9, Train Lo