In [1]:
import torch
from sklearn.model_selection import train_test_split
from models.ResNet import ResNet18
import os
import librosa
import numpy as np
from torchinfo import summary
from torch import nn
from torch.optim import Adam
from Utils import create_dataloader, k_fold_cross_validation
from torcheval.metrics import MulticlassAUROC, MulticlassF1Score

# limit GPU usage

In [2]:
torch.cuda.set_per_process_memory_fraction(0.625)

# Read Data 

In [3]:
root = '../Data/genres_original'
genres = os.listdir(root)
x = []
y = []
length = []
sr = 16*1000
for genre in genres:
    genre_root = os.path.join(root, genre)
    audios = os.listdir(genre_root)
    for audio in audios:
        audio_path = os.path.join(genre_root, audio)
        signal, sr = librosa.load(audio_path, sr=sr)
        x.append(signal)
        length.append(len(signal))
        y.append(genres.index(genre))
min_length = min(length)
print("finsh reading data")

finsh reading data


# Conversion and Compression

In [4]:
top_db = 80
for i in range(len(x)):
    signal = x[i][:min_length]
    mel_spect = librosa.feature.melspectrogram(y=signal,sr=sr,n_fft=1024) # convert signals to mel spectrogram
    mel_spect = librosa.power_to_db(mel_spect, ref=np.max, top_db=top_db) # log compression
    x[i] = mel_spect/-top_db # Normalisation
print("finish conversion and compression")

finish conversion and compression


# Split Data

In [5]:
x = np.asarray(x)
x = x.transpose((0,2,1))
x = x.reshape(x.shape[0],1,x.shape[1],x.shape[2])
y = np.asarray(y)
print(x.shape)

(1000, 1, 936, 128)


In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2,
                                                    stratify=y,shuffle=True)
# k-fold cross validation
k = 5
xs_train, ys_train, xs_valid, ys_valid = k_fold_cross_validation(x_train,y_train,k)
print("finish splitting data")

finish splitting data


In [7]:
batch_size = 32
dataloaders_train = []
dataloaders_valid = []
for i in range(k):
    dataloaders_train.append(create_dataloader(xs_train[i], ys_train[i], batch_size=batch_size))
    dataloaders_valid.append(create_dataloader(xs_valid[i], ys_valid[i], batch_size=batch_size))
dataloader_test = create_dataloader(x_test, y_test, batch_size=batch_size)
print("finish creating dataloaders")

finish creating dataloaders


# Model Construction

In [8]:
model = ResNet18(10,pre_filter_size=3,in_channels=1)
model.cuda()
loss_function = nn.CrossEntropyLoss()
opt = Adam(model.parameters(), lr=0.01)
summary(model,[(16,1,x.shape[2],x.shape[3])])

Layer (type:depth-idx)                   Output Shape              Param #
ResNet18                                 [16, 10]                  --
├─Sequential: 1-1                        [16, 64, 235, 33]         --
│    └─Conv2d: 2-1                       [16, 64, 469, 65]         640
│    └─BatchNorm2d: 2-2                  [16, 64, 469, 65]         128
│    └─ReLU: 2-3                         [16, 64, 469, 65]         --
│    └─MaxPool2d: 2-4                    [16, 64, 235, 33]         --
├─ResidualBlock: 1-2                     [16, 64, 235, 33]         --
│    └─Conv2d: 2-5                       [16, 64, 235, 33]         36,928
│    └─BatchNorm2d: 2-6                  [16, 64, 235, 33]         128
│    └─ReLU: 2-7                         [16, 64, 235, 33]         --
│    └─Conv2d: 2-8                       [16, 64, 235, 33]         36,928
│    └─BatchNorm2d: 2-9                  [16, 64, 235, 33]         128
│    └─ReLU: 2-10                        [16, 64, 235, 33]         --
├─R

# Train

In [9]:
for i in range(30):
    print("-------epoch  {} -------".format(i + 1))
    epoch_loss = 0
    epoch_accuracy = 0
    for j in range(k):
        print(f'fold {j+1}:')
        loss_train = 0
        accuracy_train = 0
        train_size = 0
        for batch_idx, (data, target) in enumerate(dataloaders_train[j]):
            model.train()
            output = model(data)
            loss = loss_function(output, target)
            opt.zero_grad()
            loss.backward()
            opt.step()
            loss_train += loss.item()*len(data)
            accuracy = (output.argmax(1) == target).sum()
            accuracy_train += accuracy
            train_size += len(data)
        print("train set loss: {}".format(loss_train/train_size))
        print("train set accuracy: {}".format(accuracy_train /train_size))

        loss_valid = 0
        accuracy_valid = 0
        valid_size = 0
        for batch_idx, (data, target) in enumerate(dataloaders_valid[j]):
            model.eval()
            with torch.no_grad():
                output = model(data)
                loss = loss_function(output, target)
                loss_valid += loss.item()*len(data)
                accuracy = (output.argmax(1) == target).sum()
                accuracy_valid += accuracy
                valid_size += len(data)
        print("valid set loss: {}".format(loss_valid/valid_size))
        print("valid set accuracy: {}".format(accuracy_valid/valid_size))
        epoch_loss += loss_valid/valid_size
        epoch_accuracy += accuracy_valid/valid_size
    print("epoch loss: {}".format(epoch_loss/k))
    print("epoch accuracy: {}".format(epoch_accuracy/k))
print("finish training")

-------epoch  1 -------
fold 1:
train set loss: 2.1882028102874758
train set accuracy: 0.27031251788139343
valid set loss: 2.3648351192474366
valid set accuracy: 0.13125000894069672
fold 2:
train set loss: 2.054834043979645
train set accuracy: 0.44062501192092896
valid set loss: 2.400520086288452
valid set accuracy: 0.11250000447034836
fold 3:
train set loss: 2.0053793132305144
train set accuracy: 0.5
valid set loss: 2.1949092388153075
valid set accuracy: 0.20000000298023224
fold 4:
train set loss: 1.9704265654087068
train set accuracy: 0.515625
valid set loss: 2.0732168674468996
valid set accuracy: 0.2750000059604645
fold 5:
train set loss: 1.9317202866077423
train set accuracy: 0.578125
valid set loss: 2.2874654293060304
valid set accuracy: 0.15000000596046448
epoch loss: 2.264189348220825
epoch accuracy: 0.17374999821186066
-------epoch  2 -------
fold 1:
train set loss: 1.9296475768089294
train set accuracy: 0.543749988079071
valid set loss: 2.0326261043548586
valid set accuracy: 0

# Test

In [10]:
loss_test = 0
accuracy_test = 0
AUC_test = 0
f1_score_test = 0
test_size = 0
for batch_idx, (data, target) in enumerate(dataloader_test):
    model.eval()
    with torch.no_grad():
        output = model(data)
        loss = loss_function(output, target)
        loss_test += loss.item()*len(data)
        accuracy = (output.argmax(1) == target).sum()
        accuracy_test += accuracy
        test_size += len(data)
        auc = MulticlassAUROC(num_classes=10)
        auc.update(output, target)
        AUC_test += auc.compute()*len(data)
        f1 = MulticlassF1Score(num_classes=10)
        f1.update(output,target)
        f1_score_test += f1.compute()*len(data)        
print("test set loss: {}".format(loss_test/test_size))
print("test set accuracy: {}".format(accuracy_test/test_size))
print("test set AUC: {}".format(AUC_test/test_size))
print("test set f1-score: {}".format(f1_score_test/test_size))

test set loss: 1.8265378189086914
test set accuracy: 0.6449999809265137
test set AUC: 0.9106495380401611
test set f1-score: 0.6449999809265137
