In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
import random
from tensorflow.keras import utils
%matplotlib inline

In [2]:
dict_genres = {'Blues':0, 'Classical':1, 'Country':2, 'Disco':3, 'Hip-Hop':4, 'Jazz':5, 'Metal':6, 'Pop':7, 'Reggae':8, 'Rock':9}

In [3]:
def create_spectogram(audio_path):
    y, sr = librosa.load(audio_path)
    spect = librosa.feature.melspectrogram(y=y, sr=sr)
    spect = librosa.power_to_db(spect, ref=np.max)
    return spect.T

In [4]:
def data_split(X_data, y_data):
    X_data_split = []
    for x in X_data:
        X_data_split.extend(np.split(x, 10))

    X_data_split = np.array(X_data_split)
    y_data_split = np.repeat(y_data, 10, axis=0)
    print(X_data_split.shape, y_data_split.shape)
    
    return X_data_split, y_data_split

In [5]:
def create_array(g):
    genres = []
    X_spect = np.empty((0, 1280, 128))
    count = 0
    #Code skips records in case of errors
    print(g)
    for filename in os.listdir(os.path.join('data/genres_original/',f'{g}')):
        try:
            count += 1
            audio_path = os.path.join(f'data/genres_original/{g}',f'{filename}')
            spect = create_spectogram(audio_path)

            # Normalize for small shape differences
            spect = spect[:1280, :]
            X_spect = np.append(X_spect, [spect], axis=0)
            genres.append(dict_genres[g])
            if count % 100 == 0:
                print("Currently processing: ", count)
        except:
            print("Couldn't process: ", count)
            continue
    y_arr = np.array(genres)
    
    X_spect, y_arr = data_split(X_spect, y_arr)
    
    return X_spect, y_arr

In [6]:
X_data_genre = [[],[],[],[],[],[],[],[],[],[]]
y_data_genre = [[],[],[],[],[],[],[],[],[],[]]

for g, i in list(dict_genres.items()):
    X_data_genre[i], y_data_genre[i] = create_array(g)

Blues
Currently processing:  100
(1000, 128, 128) (1000,)
Classical
Currently processing:  100
(1000, 128, 128) (1000,)
Country
Currently processing:  100
(1000, 128, 128) (1000,)
Disco
Currently processing:  100
(1000, 128, 128) (1000,)
Hip-Hop
Currently processing:  100
(1000, 128, 128) (1000,)
Jazz




Couldn't process:  55
Currently processing:  100
(990, 128, 128) (990,)
Metal
Currently processing:  100
(1000, 128, 128) (1000,)
Pop
Currently processing:  100
(1000, 128, 128) (1000,)
Reggae
Currently processing:  100
(1000, 128, 128) (1000,)
Rock
Currently processing:  100
(1000, 128, 128) (1000,)


In [7]:
def shuffle_data(X_data, y_data):
    training_data = []
    for i in range(X_data.shape[0]):
        training_data.append((X_data[i], y_data[i]))
    
    random.shuffle(training_data)
    
    return training_data

In [8]:
def prepare_data(data):
    X = []
    y = []

    for frames, labels in data:
        X.append(frames)
        y.append(labels)
    return X, y

In [9]:
def partition_data(training_data):
    X_train, y_train = prepare_data(training_data)

    # Calculate validation and test set sizes
    val_set_size = 100
    test_set_size = 100

    # Break x apart into train, validation, and test sets
    X_valid = X_train[:val_set_size]
    X_test = X_train[val_set_size:(val_set_size + test_set_size)]
    X_train = X_train[(val_set_size + test_set_size):]

    # Break y apart into train, validation, and test sets
    y_valid = y_train[:val_set_size]
    y_test = y_train[val_set_size:(val_set_size + test_set_size)]
    y_train = y_train[(val_set_size + test_set_size):]

    print("Train set size: " + str(len(X_train)))
    print("Validation set size: " + str(len(X_valid)))
    print("Test set size: " + str(len(X_test)))
    
    return np.array(X_train), np.array(y_train), np.array(X_valid), np.array(y_valid), np.array(X_test), np.array(y_test)

In [10]:
X_train_genre = [[],[],[],[],[],[],[],[],[],[]]
y_train_genre = [[],[],[],[],[],[],[],[],[],[]]
X_valid_genre = [[],[],[],[],[],[],[],[],[],[]]
y_valid_genre = [[],[],[],[],[],[],[],[],[],[]]
X_test_genre = [[],[],[],[],[],[],[],[],[],[]]
y_test_genre = [[],[],[],[],[],[],[],[],[],[]]

for g, i in list(dict_genres.items()):
    print(g)
    training_data = shuffle_data(X_data_genre[i], y_data_genre[i])
    X_train_genre[i], y_train_genre[i], X_valid_genre[i], y_valid_genre[i], X_test_genre[i], y_test_genre[i] = partition_data(training_data)

Blues
Train set size: 800
Validation set size: 100
Test set size: 100
Classical
Train set size: 800
Validation set size: 100
Test set size: 100
Country
Train set size: 800
Validation set size: 100
Test set size: 100
Disco
Train set size: 800
Validation set size: 100
Test set size: 100
Hip-Hop
Train set size: 800
Validation set size: 100
Test set size: 100
Jazz
Train set size: 790
Validation set size: 100
Test set size: 100
Metal
Train set size: 800
Validation set size: 100
Test set size: 100
Pop
Train set size: 800
Validation set size: 100
Test set size: 100
Reggae
Train set size: 800
Validation set size: 100
Test set size: 100
Rock
Train set size: 800
Validation set size: 100
Test set size: 100


In [11]:
X_train = np.concatenate((X_train_genre[0], 
                          X_train_genre[1], 
                          X_train_genre[2], 
                          X_train_genre[3], 
                          X_train_genre[4], 
                          X_train_genre[5], 
                          X_train_genre[6], 
                          X_train_genre[7], 
                          X_train_genre[8], 
                          X_train_genre[9]), axis = 0)

y_train = np.concatenate((y_train_genre[0], 
                         y_train_genre[1], 
                         y_train_genre[2], 
                         y_train_genre[3], 
                         y_train_genre[4], 
                         y_train_genre[5], 
                         y_train_genre[6], 
                         y_train_genre[7], 
                         y_train_genre[8], 
                         y_train_genre[9]), axis = 0)

X_valid = np.concatenate((X_valid_genre[0], 
                          X_valid_genre[1], 
                          X_valid_genre[2], 
                          X_valid_genre[3], 
                          X_valid_genre[4], 
                          X_valid_genre[5], 
                          X_valid_genre[6], 
                          X_valid_genre[7], 
                          X_valid_genre[8], 
                          X_valid_genre[9]), axis = 0)

y_valid = np.concatenate((y_valid_genre[0], 
                          y_valid_genre[1], 
                          y_valid_genre[2], 
                          y_valid_genre[3], 
                          y_valid_genre[4], 
                          y_valid_genre[5], 
                          y_valid_genre[6], 
                          y_valid_genre[7], 
                          y_valid_genre[8], 
                          y_valid_genre[9]), axis = 0)

X_test = np.concatenate((X_test_genre[0], 
                         X_test_genre[1], 
                         X_test_genre[2], 
                         X_test_genre[3], 
                         X_test_genre[4], 
                         X_test_genre[5], 
                         X_test_genre[6], 
                         X_test_genre[7], 
                         X_test_genre[8], 
                         X_test_genre[9]), axis = 0)

y_test = np.concatenate((y_test_genre[0], 
                         y_test_genre[1], 
                         y_test_genre[2], 
                         y_test_genre[3], 
                         y_test_genre[4], 
                         y_test_genre[5], 
                         y_test_genre[6], 
                         y_test_genre[7], 
                         y_test_genre[8], 
                         y_test_genre[9]), axis = 0)

In [12]:
### Convert the scale of training data
X_train_raw = librosa.core.db_to_power(X_train, ref=1.0)
X_train_log = np.log(X_train_raw)
print(np.amin(X_train_raw), np.amax(X_train_raw), np.mean(X_train_raw))
print(np.amin(X_train_log), np.amax(X_train_log), np.mean(X_train_log))

1e-08 1.0000008783668917 0.0035998650525228457
-18.420680743952367 8.783665059016772e-07 -9.850973296132196


In [13]:
X_valid_raw = librosa.core.db_to_power(X_valid, ref=1.0)
X_valid_log = np.log(X_valid_raw)
print(np.amin(X_valid_raw), np.amax(X_valid_raw), np.mean(X_valid_raw))
print(np.amin(X_valid_log), np.amax(X_valid_log), np.mean(X_valid_log))

1e-08 1.0000008783668917 0.0035179373293953045
-18.420680743952367 8.783665059016772e-07 -9.927011884488092


In [14]:
X_test_raw = librosa.core.db_to_power(X_test, ref=1.0)
X_test_log = np.log(X_test_raw)
print(np.amin(X_test_raw), np.amax(X_test_raw), np.mean(X_test_raw))
print(np.amin(X_test_log), np.amax(X_test_log), np.mean(X_test_log))

1e-08 1.0000008783668917 0.003660368779788147
-18.420680743952367 8.783665059016772e-07 -9.807614811357396


In [15]:
X_train, y_train = X_train_log, y_train
X_valid, y_valid = X_valid_log, y_valid
X_test, y_test = X_test_log, y_test

In [16]:
all_targets_sets_train = shuffle_data(X_train, y_train)
all_targets_sets_valid = shuffle_data(X_valid, y_valid)
all_targets_sets_test = shuffle_data(X_test, y_test)

In [17]:
X_train, y_train = prepare_data(all_targets_sets_train)
X_valid, y_valid = prepare_data(all_targets_sets_valid)
X_test, y_test = prepare_data(all_targets_sets_test)

In [18]:
X_train, y_train = np.array(X_train), np.array(y_train)
X_valid, y_valid = np.array(X_valid), np.array(y_valid)
X_test, y_test = np.array(X_test), np.array(y_test)

In [19]:
y_train = utils.to_categorical(y_train, num_classes=10).astype(int)
y_valid = utils.to_categorical(y_valid, num_classes=10).astype(int)
y_test = utils.to_categorical(y_test, num_classes=10).astype(int)

In [20]:
print("Train set shapes: ", X_train.shape, y_train.shape)
print("Validation set shapes: ", X_valid.shape, y_valid.shape)
print("Test set shapes: ", X_test.shape, y_test.shape)

Train set shapes:  (7990, 128, 128) (7990, 10)
Validation set shapes:  (1000, 128, 128) (1000, 10)
Test set shapes:  (1000, 128, 128) (1000, 10)


In [21]:
np.savez('data/all_targets_sets_train_new', X_train, y_train)
np.savez('data/all_targets_sets_valid_new', X_valid, y_valid)
np.savez('data/all_targets_sets_test_new', X_test, y_test)