In [1]:
import os

In [2]:
os.chdir('..')

In [3]:
os.listdir()

['.git',
 '.gitignore',
 '.idea',
 'data_loader',
 'data_processor',
 'jupyter',
 'loggers',
 'main.py',
 'models',
 'original_data',
 'processed_data',
 'README.md',
 'requirements.txt',
 'saved_models',
 'trainers']

In [4]:
from data_loader.music_wave_loader import get_music_file_names
from data_processor.utils import split_data, normalize_data, FunctionArrayExecutor, save_numpy_arrays, load_numpy_arrays
from data_processor.waves_data_processor import generate_wave_features
from loggers.statistics_loggers import print_stats
import speechbrain as sb
from speechbrain.processing.features import STFT, spectral_magnitude, Filterbank
import torch
import numpy as np

The torchaudio backend is switched to 'soundfile'. Note that 'sox_io' is not supported on Windows.
The torchaudio backend is switched to 'soundfile'. Note that 'sox_io' is not supported on Windows.


In [5]:
music_name_lists = get_music_file_names('original_data/genres_original/', 
                                        ['rock', 'classical', 'metal', 'disco', 'blues', 'reggae', 'country', 'hiphop', 'jazz', 'pop'])

In [6]:
#print(music_name_lists)

In [7]:
compute_STFT = STFT(
    sample_rate=22050, win_length=15, hop_length=20, n_fft=400
)

compute_fbanks = Filterbank(n_mels=40, sample_rate = 22050)

In [8]:
extract_wave_features = FunctionArrayExecutor([compute_STFT, spectral_magnitude, compute_fbanks])

In [9]:
features, labels = generate_wave_features(music_name_lists, 10, extract_wave_features, root='original_data/genres_original/')

In [10]:
print_stats(features)

Shape: (1998, 501, 40)
Dtype: torch.float32
 - Max:     39.036
 - Min:     -73.367
 - Mean:    -1.915
 - Std Dev: 13.666

tensor([[[ 3.5133e+00, -1.2981e+00,  8.1626e+00,  ..., -3.0039e+00,
          -2.0799e+00, -5.8768e+00],
         [ 1.4190e+01,  1.6673e+01,  1.5811e+01,  ..., -7.6798e-01,
          -2.6241e+00, -4.9942e+00],
         [ 2.1896e+01,  2.3109e+01,  2.0915e+01,  ..., -5.2069e-01,
          -1.4567e+00, -6.8625e+00],
         ...,
         [ 5.6874e-01,  1.0006e+01,  1.2583e+01,  ..., -2.0062e+01,
          -2.2445e+01, -2.1166e+01],
         [ 2.0158e+00,  1.0099e+01,  9.4656e+00,  ..., -9.9781e-01,
           5.2829e+00,  2.1455e+00],
         [-2.6946e+00,  4.0216e-01,  2.3290e+00,  ..., -3.3526e+00,
          -4.8685e+00, -5.0777e+00]],

        [[-1.4997e+00,  3.8803e-01,  1.5761e+00,  ..., -1.0176e+01,
          -8.2036e+00, -8.7734e+00],
         [ 6.7605e+00,  5.3150e+00,  9.2313e-01,  ..., -7.8166e-01,
           1.1123e-01, -3.8280e+00],
         [ 9.1284e+00,

In [11]:
print(labels.shape)
print(labels)

torch.Size([1998])
tensor([0., 0., 0.,  ..., 9., 9., 9.])


In [12]:
X_train, y_train, X_val, y_val, X_test, y_test = split_data(features, labels)

In [13]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
X_train = X_train.numpy()
X_val = X_val.numpy()
X_test = X_test.numpy()

torch.Size([1598, 501, 40])
torch.Size([200, 501, 40])
torch.Size([200, 501, 40])


In [14]:
X_train, X_val, X_test = normalize_data([X_train, X_val, X_test])

In [15]:
print_stats(X_train)

Shape: (1598, 501, 40)
Dtype: float32
 - Max:      3.460
 - Min:     -6.844
 - Mean:     0.000
 - Std Dev:  1.000

[[[ 7.35406935e-01  2.54999727e-01 -1.88041434e-01 ...  1.27193704e-01
    2.01191202e-01  4.47644979e-01]
  [ 3.91853452e-01  2.24427536e-01  5.82982838e-01 ...  2.48386070e-01
    3.30310047e-01  1.30362183e-01]
  [-3.88173759e-01  4.90773767e-01  5.90284586e-01 ...  2.00718582e-01
    1.00067779e-01  7.10427528e-04]
  ...
  [ 1.67368686e+00  1.55796874e+00  1.29960525e+00 ...  1.29509658e-01
    4.78974521e-01  4.53971148e-01]
  [ 1.35608017e+00  1.12372386e+00  9.48941827e-01 ...  6.57920763e-02
    3.72850269e-01  2.95833498e-01]
  [ 8.84513855e-01  6.62384093e-01  3.97510171e-01 ... -3.60395052e-02
    7.22623914e-02  2.37749055e-01]]

 [[-1.03603208e+00 -1.67629087e+00 -1.60006809e+00 ... -7.50090063e-01
   -5.86618304e-01 -6.59124196e-01]
  [-5.41222543e-02 -4.83659685e-01 -9.08874035e-01 ... -3.63874257e-01
   -3.13451022e-01 -4.38841581e-01]
  [-1.80673391e-01 -5

In [16]:
print_stats(X_test)

Shape: (200, 501, 40)
Dtype: float32
 - Max:      3.326
 - Min:     -5.936
 - Mean:    -0.000
 - Std Dev:  1.000

[[[-1.1490712  -1.1427468  -0.9874597  ... -0.5393     -0.15737502
   -0.6711999 ]
  [-1.2937613  -0.7250776  -0.8182667  ... -0.37402272  0.03811132
   -0.46329606]
  [-1.1102947  -0.95769125 -0.8623102  ... -0.11859881 -0.156998
   -0.44938514]
  ...
  [-1.4167888  -1.6317103  -1.2922384  ... -1.0983855  -1.5538368
   -1.4640696 ]
  [-1.4743669  -1.5104333  -1.7809726  ... -1.0580751  -1.3442246
   -1.4269443 ]
  [-0.55989337 -0.5085717  -0.36091593 ... -1.0561788  -1.2181398
   -1.1239434 ]]

 [[-1.2201456  -1.6191936  -0.92248714 ...  0.17684877  0.08223601
    0.13250767]
  [-1.6468025  -0.9801851  -0.46304196 ... -0.45198843 -0.6854435
   -0.8881606 ]
  [-1.948152   -1.2492596  -0.90752363 ... -0.63942915 -0.9434645
   -1.1977116 ]
  ...
  [ 0.02203666  0.05676341 -0.1708068  ... -0.49207518 -0.6624848
   -0.434073  ]
  [-0.09305435  0.01765893 -0.20565638 ... -0.5344

In [17]:
save_numpy_arrays([X_train, y_train, X_val, y_val, X_test, y_test], 
                  ['data_train_X.npy','data_train_y.npy', 'data_val_X.npy','data_val_y.npy', 'data_test_X.npy', 'data_test_y.npy'],
                 path_prefix='processed_data/processed_waves/10-genres/')

In [18]:
X_train, y_train, X_val, y_val, X_test, y_test = load_numpy_arrays(['data_train_X.npy','data_train_y.npy', 'data_val_X.npy','data_val_y.npy', 'data_test_X.npy', 'data_test_y.npy']
                  , path_prefix='processed_data/processed_waves/10-genres/')