In [1]:
from google.colab import drive
drive.mount('/content/drive')

!cp '/content/drive/MyDrive/music-classifier/spectrograms.zip' .
!unzip -q spectrograms.zip

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
replace spectrograms/disco/disco_92_7.pt? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


In [2]:
import os
import pickle
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

device: cuda


## dataloader

In [3]:
# dataset
class GTZAN_Dataset(Dataset):
    def __init__(self, dataset, dir, transform=None, target_transform=None):
        self.labels = pd.read_csv(dataset, dtype={'file':str, 'category':int})
        self.dir = dir
        self.transform = transform
        self.target_transform = target_transform
    
    def __len__(self):
        return len(self.labels)
    
    def split_complex_numbers(self, x):
      y = torch.empty((2, 256, 256))
      y[0], y[1] = x[0].real, x[0].imag
      return y

    def __getitem__(self, idx):
        filepath = os.path.join(self.dir, self.labels.iloc[idx,0])
        spec = torch.load(filepath)
        label = self.labels.iloc[idx,1]
        
        if self.transform:
            spec = self.split_complex_numbers(spec)
        if self.target_transform:
            label = self.target_transform(label)
        
        return spec, label


# data loader
root_path = ''
gtzan_trn = GTZAN_Dataset('dataset_files_train.csv', root_path, transform=True)
gtzan_tst  = GTZAN_Dataset('dataset_files_test.csv', root_path, transform=True)
trn_dataloader = DataLoader(gtzan_trn, batch_size=128, shuffle=True)
tst_dataloader = DataLoader(gtzan_tst, batch_size=128, shuffle=True)

## model definition

In [4]:
# model definition
class CNN(nn.Module):
     def __init__(self):
         super(CNN, self).__init__()

         self.cnn_layers = nn.Sequential(
             nn.Conv2d(2 ,   16, kernel_size=4, padding=2), nn.Tanh(), 
             nn.MaxPool2d(2), nn.BatchNorm2d(16), # 128
             nn.Conv2d(16,   32, kernel_size=4, padding=2), nn.Tanh(), 
             nn.MaxPool2d(2), nn.BatchNorm2d(32), # 64
             nn.Conv2d(32,   64, kernel_size=4, padding=2), nn.Tanh(), 
             nn.MaxPool2d(2), nn.BatchNorm2d(64),  # 32
             nn.Conv2d(64,  128, kernel_size=4, padding=2), nn.Tanh(), 
             nn.MaxPool2d(2), nn.BatchNorm2d(128), # 16
             nn.Conv2d(128, 256, kernel_size=4, padding=2), nn.Tanh(), 
             nn.MaxPool2d(2), nn.BatchNorm2d(256),  # 8
             nn.Flatten()
         )

         self.linear_layers = nn.Sequential(
             nn.Linear(8*8*256, 2048), nn.Tanh(),
             nn.Linear(2048, 2048), nn.Tanh(),
             nn.Linear(2048, 10)
         )

     def forward(self, x):
         x = self.cnn_layers(x)
         x = x.view(x.size(0), -1)
         x = self.linear_layers(x)
         return x

## training

In [5]:
model = CNN()

learning_rate = 1e-4

model = model.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [6]:
def train_loop(dataloader, model, loss_fn, optimizer):
    running_loss, running_accu = [], []
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss.append(loss.item())
    training_loss = np.mean(running_loss)
    print(f'training loss: {round(training_loss, 5)}')
    return training_loss


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f'test accuracy: {round(correct, 5)}, test loss: {round(test_loss, 5)}')
    return correct, test_loss

In [7]:
stat_train, stat_test = [], []

for t in range(100):
    print(f"Epoch {t+1}\n-------------------------------")
    stat_train.append(train_loop(trn_dataloader, model, loss_fn, optimizer))
    stat_test.append(test_loop(tst_dataloader, model, loss_fn))
print('\ndone.')

Epoch 1
-------------------------------
training loss: 2.23507
test accuracy: 0.24462, test loss: 2.17781
Epoch 2
-------------------------------
training loss: 2.13102
test accuracy: 0.27014, test loss: 2.09757
Epoch 3
-------------------------------
training loss: 2.06102
test accuracy: 0.29665, test loss: 2.04222
Epoch 4
-------------------------------
training loss: 2.00864
test accuracy: 0.30265, test loss: 1.99987
Epoch 5
-------------------------------
training loss: 1.96956
test accuracy: 0.30915, test loss: 1.96431
Epoch 6
-------------------------------
training loss: 1.93586
test accuracy: 0.31616, test loss: 1.93724
Epoch 7
-------------------------------
training loss: 1.90912
test accuracy: 0.32416, test loss: 1.91407
Epoch 8
-------------------------------
training loss: 1.88666
test accuracy: 0.33467, test loss: 1.89248
Epoch 9
-------------------------------
training loss: 1.86369
test accuracy: 0.33767, test loss: 1.87265
Epoch 10
-------------------------------
train

## save model

In [8]:
with open('drive/MyDrive/music-classifier/training_statistics.pkl', 'wb') as file:
  pickle.dump([stat_train, stat_test], file)

torch.save(model.state_dict(), 'drive/MyDrive/music-classifier/nn_classifier_statedict.pt')