In [1]:
import math
import json

import keras
import librosa
import os

import numpy
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sn


class_names = ["Blues", "Classical", "Country", "Disco", "Hiphop", "Jazz", "Metal", "Pop", "Reggae", "Rock"]
print(tf.config.list_physical_devices('GPU'))

def preprocess(dataset_path, num_mfcc=40, n_fft=2048, hop_length=512, num_segment=10):
    all_mfcc = np.empty([1000 * num_segment, 40, 130], dtype=np.float32)
    all_spec = np.empty([1000 * num_segment, 1025, 130], dtype=np.float32)
    all_mel = np.empty([1000 * num_segment, 128, 130], dtype=np.float32)
    all_chroma = np.empty([1000 * num_segment, 12, 130], dtype=np.float32)
    labels = np.empty([1000 * num_segment])
    sample_rate = 22050
    sample_per_segment = int(sample_rate*30/num_segment)

    count = 0
    for label_idx, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        if dirpath == dataset_path:
            continue
        print(dirpath)
        for f in sorted(filenames):
            if not f.endswith('.wav'):
                continue
            file_path = str(str(dirpath).split('\\')[-1]) + '/' + str(f)
            # print("Track Name", file_path)

            try:
                y, sr = librosa.load(path=dirpath + "\\" + f, sr=sample_rate)
            except:
                print("Exception")
                continue
            for n in range(num_segment):
                segment = y[sample_per_segment*n: sample_per_segment*(n + 1)]
                # Get MFCC
                mfcc = librosa.feature.mfcc(y=segment,
                                            sr=sample_rate, n_mfcc=num_mfcc, n_fft=n_fft,
                                            hop_length=hop_length, dtype=np.float32)

                # Avoid smaller or corrupt segments
                if mfcc.shape[1] == math.ceil(sample_per_segment / hop_length):
                    # print('Getting MFCC')
                    all_mfcc[count] = mfcc
                    labels[count] = label_idx - 1
                    # data["track_id"].append(f.split(".")[0])

                    # Get spectrogram
                    # print('Getting Spectrogram')
                    spec = librosa.stft(y=segment)
                    spec_db = librosa.amplitude_to_db(abs(spec))
                    all_spec[count] =spec_db

                    # Get mel-spectrogram
                    # print('Getting Mel-Spectrogram')
                    mel = librosa.feature.melspectrogram(y=segment, dtype=np.float32)
                    mel_db = librosa.power_to_db(mel)
                    all_mel[count] = mel_db

                    # Get chroma-features
                    # print('Getting Chroma')
                    all_chroma[count] = librosa.feature.chroma_stft(y=segment, dtype=np.float32)
                    count += 1

    # all_mfcc = all_mfcc.astype(np.float32)
    # all_spec = all_spec.astype(np.float32)
    # all_mel = all_mel.astype(np.float32)
    # all_chroma = all_chroma.astype(np.float32)
    print(count)
    all_mfcc = np.delete(all_mfcc, np.s_[count:], axis=0)
    all_spec = np.delete(all_spec, np.s_[count:], axis=0)
    all_mel = np.delete(all_mel, np.s_[count:], axis=0)
    all_chroma = np.delete(all_chroma, np.s_[count:], axis=0)
    labels = np.delete(labels, np.s_[count:])

    return all_mfcc, all_spec, all_mel, all_chroma, labels

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
def prepare_datasets(test_size, validation_size):

    mfcc, spec, mel_spec, chroma, y = preprocess(r"C:\Users\dpetr\Desktop\sxoli\music genre classification\MFCC-CNN\Data\genres_original")

    mfcc_train, mfcc_test, spec_train, spec_test, mel_spec_train, mel_spec_test, chroma_train, chroma_test, y_train, y_test = train_test_split(mfcc, spec, mel_spec, chroma, y, test_size=test_size)

    mfcc_train, mfcc_validation, spec_train, spec_validation, mel_spec_train, mel_spec_validation, chroma_train, chroma_validation, y_train, y_validation = train_test_split(mfcc_train, spec_train, mel_spec_train, chroma_train, y_train, test_size=validation_size)

    mfcc_train = mfcc_train[..., np.newaxis]
    mfcc_validation = mfcc_validation[..., np.newaxis]
    mfcc_test = mfcc_test[..., np.newaxis]

    spec_train = spec_train[..., np.newaxis]
    spec_validation = spec_validation[..., np.newaxis]
    spec_test = spec_test[..., np.newaxis]

    mel_spec_train = mel_spec_train[..., np.newaxis]
    mel_spec_validation = mel_spec_validation[..., np.newaxis]
    mel_spec_test = mel_spec_test[..., np.newaxis]

    chroma_train = chroma_train[..., np.newaxis]
    chroma_validation = chroma_validation[..., np.newaxis]
    chroma_test = chroma_test[..., np.newaxis]

    return mfcc_train, mfcc_validation, mfcc_test, spec_train, spec_validation, spec_test, mel_spec_train, mel_spec_validation, mel_spec_test, chroma_train, chroma_validation, chroma_test, y_train, y_validation, y_test


mfcc_train, mfcc_validation, mfcc_test, spec_train, spec_validation, spec_test, mel_spec_train, mel_spec_validation, mel_spec_test, chroma_train, chroma_validation, chroma_test, y_train, y_validation, y_test = prepare_datasets(test_size=0.2, validation_size=0.25)

print('MFCC', mfcc_train.shape, mfcc_validation.shape, mfcc_test.shape)
print('Spectrogram', spec_train.shape, spec_validation.shape, spec_test.shape)
print('Mel-Spectrogram', mel_spec_train.shape, mel_spec_validation.shape, mel_spec_test.shape)
print('Chroma', chroma_train.shape, chroma_validation.shape, chroma_test.shape)
print('Labels', y_train.shape, y_validation.shape, y_test.shape)

C:\Users\dpetr\Desktop\sxoli\music genre classification\MFCC-CNN\Data\genres_original\blues
C:\Users\dpetr\Desktop\sxoli\music genre classification\MFCC-CNN\Data\genres_original\classical
C:\Users\dpetr\Desktop\sxoli\music genre classification\MFCC-CNN\Data\genres_original\country
C:\Users\dpetr\Desktop\sxoli\music genre classification\MFCC-CNN\Data\genres_original\disco
C:\Users\dpetr\Desktop\sxoli\music genre classification\MFCC-CNN\Data\genres_original\hiphop
C:\Users\dpetr\Desktop\sxoli\music genre classification\MFCC-CNN\Data\genres_original\jazz


  return f(*args, **kwargs)


Exception
C:\Users\dpetr\Desktop\sxoli\music genre classification\MFCC-CNN\Data\genres_original\metal
C:\Users\dpetr\Desktop\sxoli\music genre classification\MFCC-CNN\Data\genres_original\pop
C:\Users\dpetr\Desktop\sxoli\music genre classification\MFCC-CNN\Data\genres_original\reggae
C:\Users\dpetr\Desktop\sxoli\music genre classification\MFCC-CNN\Data\genres_original\rock
9986


KeyboardInterrupt: 