In [3]:
# please upload data directory to Google drive
# use Google Drive to load data
from google.colab import drive, files
import zipfile
drive.mount('/content/drive')

zipname = 'supports.zip'
uploaded = files.upload()
# Extract the zip file
with zipfile.ZipFile(zipname, 'r') as zip_ref:
  zip_ref.extractall()  # Extract all files to the current directory

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Saving supports.zip to supports.zip


In [5]:
!pip install -r "requirements.txt"
!pip install torchinfo
!pip install torcheval
import torch
from sklearn.model_selection import train_test_split
from models.DCNN import DCNN
import os
import librosa
import numpy as np
from torchinfo import summary
from torch import nn
from torch.optim import Adam
from Utils import create_dataloader, k_fold_cross_validation
from torcheval.metrics import MulticlassAUROC, MulticlassF1Score

Collecting anyio==4.3.0 (from -r requirements.txt (line 1))
  Using cached anyio-4.3.0-py3-none-any.whl (85 kB)
Collecting arrow==1.3.0 (from -r requirements.txt (line 4))
  Using cached arrow-1.3.0-py3-none-any.whl (66 kB)
Collecting asttokens==2.4.1 (from -r requirements.txt (line 5))
  Using cached asttokens-2.4.1-py2.py3-none-any.whl (27 kB)
Collecting async-lru==2.0.4 (from -r requirements.txt (line 6))
  Using cached async_lru-2.0.4-py3-none-any.whl (6.1 kB)
Collecting colorama==0.4.6 (from -r requirements.txt (line 15))
  Using cached colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Collecting comm==0.2.2 (from -r requirements.txt (line 16))
  Using cached comm-0.2.2-py3-none-any.whl (7.2 kB)
Collecting debugpy==1.8.1 (from -r requirements.txt (line 19))
  Using cached debugpy-1.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
Collecting decorator==5.1.1 (from -r requirements.txt (line 20))
  Using cached decorator-5.1.1-py3-none-any.whl (9.1 kB)
Collecting exe

# limit GPU usage (Can be ignored)

In [6]:
# torch.cuda.set_per_process_memory_fraction(0.8)

TypeError: Invalid type for fraction argument, must be `float`

# Read Data

In [7]:
root = '/content/drive/MyDrive/Data/genres_original' # Change according path storing data
genres = os.listdir(root)
x = []
y = []
length = []
sr = 16*1000
for genre in genres:
    genre_root = os.path.join(root, genre)
    audios = os.listdir(genre_root)
    for audio in audios:
        audio_path = os.path.join(genre_root, audio)
        signal, sr = librosa.load(audio_path, sr=sr)
        x.append(signal)
        length.append(len(signal))
        y.append(genres.index(genre))
min_length = min(length)
print("finsh reading data")

finsh reading data


# Segment and Normalise

In [8]:
for i in range(len(x)):
    x[i] = x[i][0:min_length]
    x[i] = librosa.util.normalize(x[i])
x = np.asarray(x)
y = np.asarray(y)
# print(x.shape,y.shape)
seg_length = 59049
frame_num = int(x.shape[1]/seg_length)
preprocessed_x = x[:, :frame_num*seg_length].reshape(frame_num*x.shape[0],1,seg_length)
preprocessed_y = (y.reshape(y.shape[0],1)*np.ones((y.shape[0],frame_num))).reshape(y.shape[0]*frame_num)
# print(preprocessed_x.shape,preprocessed_y.shape)
print("finish segmentation and normalisation")

finish segmentation and normalisation


# Split Data

In [9]:
x_train, x_test, y_train, y_test = train_test_split(preprocessed_x, preprocessed_y, test_size=0.2,
                                                    stratify=preprocessed_y,shuffle=True)
# k-fold cross validation
k = 5
xs_train, ys_train, xs_valid, ys_valid = k_fold_cross_validation(x_train,y_train,k)
print("finish splitting data")

finish splitting data


# Create Dataloaders

In [15]:
batch_size = 64 # can be adjusted according to GPU memory size
dataloaders_train = []
dataloaders_valid = []
for i in range(k):
    dataloaders_train.append(create_dataloader(xs_train[i], ys_train[i], batch_size=batch_size))
    dataloaders_valid.append(create_dataloader(xs_valid[i], ys_valid[i], batch_size=batch_size))
dataloader_test = create_dataloader(x_test, y_test, batch_size=batch_size)
print("finish creating dataloaders")

finish creating dataloaders


# Model Construction

In [16]:
model = DCNN(10)
model.cuda()
loss_function = nn.CrossEntropyLoss()
opt = Adam(model.parameters(), lr=0.01)
summary(model,[(64,1,seg_length)])

Layer (type:depth-idx)                   Output Shape              Param #
DCNN                                     [64, 10]                  --
├─ConLayer: 1-1                          [64, 128, 19683]          --
│    └─Conv1d: 2-1                       [64, 128, 19683]          512
│    └─BatchNorm1d: 2-2                  [64, 128, 19683]          256
│    └─ReLU: 2-3                         [64, 128, 19683]          --
├─Sequential: 1-2                        [64, 128, 2187]           --
│    └─ConLayer: 2-4                     [64, 128, 6561]           --
│    │    └─Conv1d: 3-1                  [64, 128, 19683]          49,280
│    │    └─BatchNorm1d: 3-2             [64, 128, 19683]          256
│    │    └─ReLU: 3-3                    [64, 128, 19683]          --
│    │    └─MaxPool1d: 3-4               [64, 128, 6561]           --
│    └─ConLayer: 2-5                     [64, 128, 2187]           --
│    │    └─Conv1d: 3-5                  [64, 128, 6561]           49,280
│   

# Train

In [17]:
for i in range(15):
    print("-------epoch  {} -------".format(i + 1))
    for j in range(k):
        print(f'fold {j+1}:')
        loss_train = 0
        accuracy_train = 0
        train_size = 0
        for batch_idx, (data, target) in enumerate(dataloaders_train[j]):
            model.train()
            output = model(data)
            loss = loss_function(output, target)
            opt.zero_grad()
            loss.backward()
            opt.step()
            loss_train += loss.item()*len(data)
            accuracy = (output.argmax(1) == target).sum()
            accuracy_train += accuracy
            train_size += len(data)
        print("train set loss: {}".format(loss_train/train_size))
        print("train set accuracy: {}".format(accuracy_train /train_size))

        loss_valid = 0
        accuracy_valid = 0
        valid_size = 0
        for batch_idx, (data, target) in enumerate(dataloaders_valid[j]):
            model.eval()
            with torch.no_grad():
                output = model(data)
                loss = loss_function(output, target)
                loss_valid += loss.item()*len(data)
                accuracy = (output.argmax(1) == target).sum()
                accuracy_valid += accuracy
                valid_size += len(data)
        print("valid set loss: {}".format(loss_valid/valid_size))
        print("valid set accuracy: {}".format(accuracy_valid/valid_size))
print("finish training")

-------epoch  1 -------
fold 1:
train set loss: 2.0863609210191796
train set accuracy: 0.373775839805603
valid set loss: 2.067073633673401
valid set accuracy: 0.3373231589794159
fold 2:
train set loss: 1.9408030193440933
train set accuracy: 0.5171381831169128
valid set loss: 1.8685509359486345
valid set accuracy: 0.6028291583061218
fold 3:
train set loss: 1.8600755171106482
train set accuracy: 0.5851469039916992
valid set loss: 1.8864874109479886
valid set accuracy: 0.47660499811172485
fold 4:
train set loss: 1.7979087117447299
train set accuracy: 0.6441784501075745
valid set loss: 1.7994746755853182
valid set accuracy: 0.5560391545295715
fold 5:
train set loss: 1.7630908216823042
train set accuracy: 0.6528835296630859
valid set loss: 1.8177338426079401
valid set accuracy: 0.5723612308502197
-------epoch  2 -------
fold 1:
train set loss: 1.7297430650713135
train set accuracy: 0.6964091062545776
valid set loss: 1.8003314409214473
valid set accuracy: 0.5799782276153564
fold 2:
train set

# Test

In [18]:
loss_test = 0
accuracy_test = 0
AUC_test = 0
f1_score_test = 0
test_size = 0
for batch_idx, (data, target) in enumerate(dataloader_test):
    model.eval()
    with torch.no_grad():
        output = model(data)
        loss = loss_function(output, target)
        loss_test += loss.item()*len(data)
        accuracy = (output.argmax(1) == target).sum()
        accuracy_test += accuracy
        test_size += len(data)
        auc = MulticlassAUROC(num_classes=10)
        auc.update(output, target)
        AUC_test += auc.compute()*len(data)
        f1 = MulticlassF1Score(num_classes=10)
        f1.update(output,target)
        f1_score_test += f1.compute()*len(data)
print("test set loss: {}".format(loss_test/test_size))
print("test set accuracy: {}".format(accuracy_test/test_size))
print("test set AUC: {}".format(AUC_test/test_size))
print("test set f1-score: {}".format(f1_score_test/test_size))

test set loss: 1.5538323197393857
test set accuracy: 0.8920800685882568
test set AUC: 0.8870842456817627
test set f1-score: 0.8920800685882568
