In [3]:
import gc
import keras
import librosa
import os


import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sn


class_names = ["Blues", "Classical", "Country", "Disco", "Hiphop", "Jazz", "Metal", "Pop", "Reggae", "Rock"]
print(tf.config.list_physical_devices('GPU'))



[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [4]:
def preprocess(dataset_path, num_mfcc=20, n_fft=2048, hop_length=512, num_segment=10):
    all_mel = np.empty([1000 * num_segment, 128, 130], dtype=np.float32)
    labels = np.empty([1000 * num_segment], dtype=np.int32)
    sample_rate = 22050
    sample_per_segment = int(sample_rate*30/num_segment)

    count = 0
    for label_idx, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        if dirpath == dataset_path:
            continue

        for f in sorted(filenames):
            if not f.endswith('.wav'):
                continue
            file_path = str(str(dirpath).split('\\')[-1]) + '/' + str(f)
            print("Track Name", file_path)

            try:
                y, sr = librosa.load(path=dirpath + "\\" + f, sr=sample_rate)
            except:
                print("Exception")
                continue

            for n in range(num_segment):
                segment = y[sample_per_segment*n: sample_per_segment*(n + 1)]
                # Get mel-spectrogram
                mel = librosa.feature.melspectrogram(y=segment, sr=sr, n_fft=n_fft, hop_length=hop_length, dtype=np.float32)
                mel_db = librosa.power_to_db(mel)
                if mel_db.shape[0] == all_mel.shape[1] and mel_db.shape[1] == all_mel.shape[2]:
                    all_mel[count] = mel_db
                    labels[count] = label_idx - 1
                    count += 1

    print(count)
    all_mel = np.delete(all_mel, np.s_[count:], axis=0)
    labels = np.delete(labels, np.s_[count:])
    return all_mel, labels

In [5]:
mel, labels = preprocess(r"C:\Users\dpetr\Desktop\sxoli\music genre classification\MFCC-CNN\Data\genres_original")

print(mel.shape, mel.dtype, labels.shape, labels.dtype)

Track Name blues/blues.00000.wav
Track Name blues/blues.00001.wav
Track Name blues/blues.00002.wav
Track Name blues/blues.00003.wav
Track Name blues/blues.00004.wav
Track Name blues/blues.00005.wav
Track Name blues/blues.00006.wav
Track Name blues/blues.00007.wav
Track Name blues/blues.00008.wav
Track Name blues/blues.00009.wav
Track Name blues/blues.00010.wav
Track Name blues/blues.00011.wav
Track Name blues/blues.00012.wav
Track Name blues/blues.00013.wav
Track Name blues/blues.00014.wav
Track Name blues/blues.00015.wav
Track Name blues/blues.00016.wav
Track Name blues/blues.00017.wav
Track Name blues/blues.00018.wav
Track Name blues/blues.00019.wav
Track Name blues/blues.00020.wav
Track Name blues/blues.00021.wav
Track Name blues/blues.00022.wav
Track Name blues/blues.00023.wav
Track Name blues/blues.00024.wav
Track Name blues/blues.00025.wav
Track Name blues/blues.00026.wav
Track Name blues/blues.00027.wav
Track Name blues/blues.00028.wav
Track Name blues/blues.00029.wav
Track Name

  return f(*args, **kwargs)


Track Name jazz/jazz.00056.wav
Track Name jazz/jazz.00057.wav
Track Name jazz/jazz.00058.wav
Track Name jazz/jazz.00059.wav
Track Name jazz/jazz.00060.wav
Track Name jazz/jazz.00061.wav
Track Name jazz/jazz.00062.wav
Track Name jazz/jazz.00063.wav
Track Name jazz/jazz.00064.wav
Track Name jazz/jazz.00065.wav
Track Name jazz/jazz.00066.wav
Track Name jazz/jazz.00067.wav
Track Name jazz/jazz.00068.wav
Track Name jazz/jazz.00069.wav
Track Name jazz/jazz.00070.wav
Track Name jazz/jazz.00071.wav
Track Name jazz/jazz.00072.wav
Track Name jazz/jazz.00073.wav
Track Name jazz/jazz.00074.wav
Track Name jazz/jazz.00075.wav
Track Name jazz/jazz.00076.wav
Track Name jazz/jazz.00077.wav
Track Name jazz/jazz.00078.wav
Track Name jazz/jazz.00079.wav
Track Name jazz/jazz.00080.wav
Track Name jazz/jazz.00081.wav
Track Name jazz/jazz.00082.wav
Track Name jazz/jazz.00083.wav
Track Name jazz/jazz.00084.wav
Track Name jazz/jazz.00085.wav
Track Name jazz/jazz.00086.wav
Track Name jazz/jazz.00087.wav
Track Na

In [6]:
def prepare_datasets(test_size, validation_size):
    x_train, x_test, y_train, y_test = train_test_split(mel, labels, test_size=test_size, stratify=labels)
    x_train, x_validation, y_train, y_validation = train_test_split(x_train, y_train, test_size=validation_size, stratify=y_train)
    return x_train, x_test, x_validation, y_train, y_test, y_validation


mel_train, mel_test, mel_validation, y_train, y_test, y_validation = prepare_datasets(test_size=1500, validation_size=1500)
print(mel_train.shape, y_train.shape, mel_validation.shape, y_validation.shape, mel_test.shape, y_test.shape)

(6986, 128, 130) (6986,) (1500, 128, 130) (1500,) (1500, 128, 130) (1500,)


In [None]:
mel_file = np.load(r'C:\Users\dpetr\Desktop\sxoli\Level 3\Individual\datasets\GTZAN\mel_spectogram_train_validation_test.npz')

mel_train = mel_file['mel_spec_train']
mel_validation = mel_file['mel_spec_validation']
mel_test = mel_file['mel_spec_test']

y_train = mel_file['y_train']
y_validation = mel_file['y_validation']
y_test= mel_file['y_test']

In [7]:
print("X shape: ", mel_train.shape, mel_validation.shape, mel_test.shape)
print("Y shape: ", y_train.shape, y_validation.shape, y_test.shape)

X shape:  (6986, 128, 130) (1500, 128, 130) (1500, 128, 130)
Y shape:  (6986,) (1500,) (1500,)


In [8]:
mean = np.mean(mel_train)
std = np.std(mel_train)
mel_train = (mel_train - mean) / std
mel_test = (mel_test - mean) / std
mel_validation = (mel_validation - mean) / std

In [None]:
def build_mode(input_shape):

    model = keras.Sequential()

    model.add(keras.layers.Flatten(input_shape=input_shape))

    model.add(keras.layers.Dense(256, activation="relu",
                       kernel_regularizer=keras.regularizers.l2(0.001)))
    model.add(keras.layers.Dropout(0.2))

    model.add(keras.layers.Dense(64, activation="relu",
                       kernel_regularizer=keras.regularizers.l2(0.001)))
    model.add(keras.layers.Dropout(0.2))

    model.add(keras.layers.Dense(64, activation="relu", kernel_regularizer=keras.regularizers.l2(0.001)))
    model.add(keras.layers.Dropout(0.1))

In [None]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=input_shape),
#     keras.layers.Dense(512, activation="relu",
#                        kernel_regularizer=keras.regularizers.l2(0.001)),
#     keras.layers.Dropout(0.4),
    keras.layers.Dense(256, activation="relu",
                       kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.Dropout(0.2),
#     keras.layers.Dense(128, activation="relu",
#                        kernel_regularizer=keras.regularizers.l2(0.001)),
#     keras.layers.Dropout(0.2),
    keras.layers.Dense(64, activation="relu",
                       kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(64, activation="relu",
                       kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.Dropout(0.1),
    keras.layers.Dense(32, activation="relu",
                       kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.Dense(len(numpy.unique(y)), activation="softmax")
])

optimizer = keras.optimizers.Adam(learning_rate=0.0002)

model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

model.summary()

history = model.fit(x_train, y_train,
          validation_data=(x_test, y_test),
          epochs=100,
          batch_size=32
          )

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy' + str())
plt.legend()
plt.figure()

plt.plot(epochs, loss, 'r', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()
