In [None]:
########################################
#  ONLY RUN THIS CELL ON GOOGLE COLAB  #
########################################

from google.colab import drive
drive.mount('/content/gdrive')
os.chdir("/content/gdrive/My Drive/5100project")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
# Import Libraries
import os
import librosa
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split



In [5]:
import random
from torch.utils.data import ConcatDataset


class MelSpectrogramDataset(Dataset):
    def __init__(self, data_dir, augment=False):
        self.data_dir = data_dir
        self.all_mel_spectrograms = []
        self.all_labels = []
        self.label_to_id = {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}
        self.augment = augment
        self.load_data()

    def load_data(self):
        for root, dirs, files in os.walk(self.data_dir):
            for file in files:
                file_path = os.path.join(root, file)
                # Load audio file
                signal, sr = librosa.load(file_path, sr=22050)

                if self.augment:
                    # Random Crop
                    crop_length = random.randint(0, int(0.1*sr))
                    signal = signal[crop_length:]

                    # Add random noise
                    noise = 0.05*np.random.randn(len(signal))
                    signal += noise

                # Convert to Mel Spectrogram
                mel_spectrogram = librosa.feature.melspectrogram(y=signal, sr=sr, n_fft=2048, hop_length=512, n_mels=128)

                # Fix length
                mel_spectrogram = librosa.util.fix_length(mel_spectrogram, size=1292)

                # Add feature
                self.all_mel_spectrograms.append(mel_spectrogram)

                # Add label
                label = os.path.basename(root)
                self.all_labels.append(self.label_to_id[label])
        # verify if all the mel spectrograms have the same shape
        # print(set([ms.shape for ms in self.all_mel_spectrograms]))
        assert len(set([ms.shape for ms in self.all_mel_spectrograms])) == 1

    def __len__(self):
        return len(self.all_labels)

    def __getitem__(self, index):
        mel_spectrogram = self.all_mel_spectrograms[index]
        label = self.all_labels[index]
        return mel_spectrogram, label


# The path to dataset
data_dir = './genres'

# create dataset with data augmentation
augmented_dataset1 = MelSpectrogramDataset(data_dir, augment=True)
augmented_dataset2 = MelSpectrogramDataset(data_dir, augment=True)


# concatenate original dataset and augmented dataset
original_dataset = MelSpectrogramDataset(data_dir)
combined_dataset = ConcatDataset([original_dataset, augmented_dataset1, augmented_dataset2])

# Split combined dataset into training set and test set
train_dataset, test_dataset = train_test_split(combined_dataset, test_size=0.4, random_state=42)

# Create dataloader
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)


In [6]:
class MelSpectrogramClassifier(nn.Module):
    def __init__(self):
        super(MelSpectrogramClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.max_pool1 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.conv2 = nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.max_pool2 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.flatten = nn.Flatten()
        self.dense1 = nn.Linear(661504, 128)
        self.dropout = nn.Dropout(0.5)
        self.dense2 = nn.Linear(128, 10)

    def forward(self, x):
        # print(x.shape)
        x = self.conv1(x)
        # print(x.shape)
        x = nn.functional.relu(x)
        x = self.max_pool1(x)
        # print(x.shape)
        x = self.conv2(x)
        # print(x.shape)
        x = nn.functional.relu(x)
        x = self.max_pool2(x)
        # print(x.shape)
        x = self.flatten(x)
        # print(x.shape)
        x = self.dense1(x)
        # print(x.shape)
        x = nn.functional.relu(x)
        x = self.dropout(x)
        # print(x.shape)
        x = self.dense2(x)
        # print(x.shape)
        return x

# Create model
model = MelSpectrogramClassifier()


In [7]:
learning_rate = 0.001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()



In [8]:
num_epochs = 40
for epoch in range(num_epochs):
    train_loss = 0.0
    train_accuracy = 0.0
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # Convert to tensor
        data, target = torch.tensor(data), torch.tensor(target)

        # Pass data to model
        optimizer.zero_grad()
        output = model(data.unsqueeze(1).float())

        # Calculate loss
        loss = criterion(output, target)

        # backward and optimize
        loss.backward()
        optimizer.step()

        # Calculate test accuracy
        predicted = output.argmax(1)
        train_accuracy += (predicted == target).sum().item()
        train_loss += loss.item()

    train_loss /= len(train_loader.dataset)
    train_accuracy /= len(train_loader.dataset)

    # Evaluate model
    test_loss = 0.0
    test_accuracy = 0.0
    model.eval()
    with torch.no_grad():
        for data, target in test_loader:
            # Convert data and label to tensor
            data, target = torch.tensor(data), torch.tensor(target)

            # Pass data to model
            output = model(data.unsqueeze(1).float())

            # Calculate loss
            loss = criterion(output, target)

            # Calculate accuracy
            predicted = output.argmax(1)
            test_accuracy += (predicted == target).sum().item()
            test_loss += loss.item()

    test_loss /= len(test_loader.dataset)
    test_accuracy /= len(test_loader.dataset)

    print('Epoch [{}/{}], Train Loss: {:.4f}, Train Accuracy: {:.4f}, Test Loss: {:.4f}, Test Accuracy: {:.4f}'.format(epoch+1, num_epochs, train_loss, train_accuracy, test_loss, test_accuracy))


  data, target = torch.tensor(data), torch.tensor(target)
  data, target = torch.tensor(data), torch.tensor(target)


Epoch [1/15], Train Loss: 1.2964, Train Accuracy: 0.1639, Test Loss: 0.0706, Test Accuracy: 0.2083
Epoch [2/15], Train Loss: 0.0617, Train Accuracy: 0.3667, Test Loss: 0.0545, Test Accuracy: 0.4967
Epoch [3/15], Train Loss: 0.0397, Train Accuracy: 0.6706, Test Loss: 0.0411, Test Accuracy: 0.7033
Epoch [4/15], Train Loss: 0.0225, Train Accuracy: 0.8572, Test Loss: 0.0337, Test Accuracy: 0.7475
Epoch [5/15], Train Loss: 0.0151, Train Accuracy: 0.9117, Test Loss: 0.0338, Test Accuracy: 0.7725
Epoch [6/15], Train Loss: 0.0107, Train Accuracy: 0.9400, Test Loss: 0.0316, Test Accuracy: 0.7975
Epoch [7/15], Train Loss: 0.0080, Train Accuracy: 0.9494, Test Loss: 0.0342, Test Accuracy: 0.8000
Epoch [8/15], Train Loss: 0.0083, Train Accuracy: 0.9439, Test Loss: 0.0377, Test Accuracy: 0.7725
Epoch [9/15], Train Loss: 0.0070, Train Accuracy: 0.9561, Test Loss: 0.0350, Test Accuracy: 0.8008
Epoch [10/15], Train Loss: 0.0048, Train Accuracy: 0.9694, Test Loss: 0.0364, Test Accuracy: 0.8025


KeyboardInterrupt: 

In [13]:
# save model to target path
torch.save(model.state_dict(), './backend/recommender/classify_model.pth')

Yes
