In [1]:
import os

In [2]:
os.chdir('..')

In [3]:
os.listdir()

['.git',
 '.gitignore',
 '.idea',
 'data_loader',
 'data_processor',
 'jupyter',
 'loggers',
 'main.py',
 'models',
 'original_data',
 'processed_data',
 'README.md',
 'requirements.txt',
 'saved_models',
 'trainers']

In [4]:
from data_loader.music_wave_loader import get_music_file_names
from data_processor.utils import split_data, normalize_data, FunctionArrayExecutor, save_numpy_arrays, load_numpy_arrays
from data_processor.waves_data_processor import generate_wave_features
from loggers.statistics_loggers import print_stats
import speechbrain as sb
from speechbrain.processing.features import STFT, spectral_magnitude, Filterbank
import torch
import numpy as np

The torchaudio backend is switched to 'soundfile'. Note that 'sox_io' is not supported on Windows.
The torchaudio backend is switched to 'soundfile'. Note that 'sox_io' is not supported on Windows.


In [5]:
music_name_lists = get_music_file_names('original_data/genres_original/', 
                                        ['blues', 'classical', 'country', 'disco'])

In [6]:
#print(music_name_lists)

In [7]:
compute_STFT = STFT(
    sample_rate=22050, win_length=15, hop_length=20, n_fft=400
)

compute_fbanks = Filterbank(n_mels=40, sample_rate = 22050)

In [8]:
extract_wave_features = FunctionArrayExecutor([compute_STFT, spectral_magnitude, compute_fbanks])

In [9]:
features, labels = generate_wave_features(music_name_lists, 10, extract_wave_features, root='original_data/genres_original/')

In [10]:
print_stats(features)

Shape: (800, 501, 40)
Dtype: torch.float32
 - Max:     37.386
 - Min:     -73.367
 - Mean:    -4.458
 - Std Dev: 15.115

tensor([[[  5.1591,   9.7620,  11.2346,  ...,  -5.3144,  -8.4424,  -6.2970],
         [ 17.7271,  13.2259,  12.1101,  ...,  -7.6559,  -8.1737,  -3.6109],
         [ 25.0152,  24.5609,  22.3748,  ...,  -8.7879,  -8.4183,  -7.1700],
         ...,
         [ 16.7082,  16.9904,  14.8524,  ..., -20.1472, -22.0781, -21.5942],
         [ 14.8163,  15.1201,  12.2718,  ..., -14.7370, -17.8763, -16.2846],
         [  2.6044,   6.6420,   8.4733,  ..., -11.2405, -10.6533, -10.4793]],

        [[ 11.4552,  10.6462,   8.9892,  ...,  -5.8976, -12.1851, -11.3628],
         [ 13.9335,  14.3317,  13.1824,  ..., -13.5134, -12.4519, -17.0976],
         [ 13.6456,  13.0926,  10.6186,  ..., -10.6267, -15.3692, -17.0501],
         ...,
         [  6.1001,   7.4307,  12.4845,  ...,   2.3091,   7.8351,   4.3013],
         [ 12.4111,  13.3474,  12.8156,  ...,   2.6472,   9.5141,   7.9672],
  

In [11]:
print(labels.shape)
print(labels)

torch.Size([800])
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1.

In [12]:
X_train, y_train, X_val, y_val, X_test, y_test = split_data(features, labels)

In [13]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
X_train = X_train.numpy()
X_val = X_val.numpy()
X_test = X_test.numpy()

torch.Size([640, 501, 40])
torch.Size([80, 501, 40])
torch.Size([80, 501, 40])


In [14]:
X_train, X_val, X_test = normalize_data([X_train, X_val, X_test])

In [15]:
print_stats(X_train)

Shape: (640, 501, 40)
Dtype: float32
 - Max:      3.120
 - Min:     -6.313
 - Mean:    -0.000
 - Std Dev:  1.000

[[[-2.2603955  -2.6279683  -3.0352345  ... -3.4320953  -3.3666427
   -3.3074427 ]
  [-2.565234   -3.0801253  -3.3854938  ... -2.7341614  -2.6954138
   -2.6208115 ]
  [-2.7137022  -2.682478   -2.8931212  ... -2.6997137  -2.6482296
   -2.5788894 ]
  ...
  [-1.4594572  -0.18837595  0.03816053 ... -1.2722921  -0.8131171
   -1.0031278 ]
  [-1.18371    -0.13738212  0.16577199 ... -1.1046832  -0.7847943
   -0.9522553 ]
  [-0.24282342 -0.16645542 -0.1290172  ... -0.17861882 -0.16073951
   -0.16302545]]

 [[-1.450081   -1.5701135  -1.6312584  ... -1.353962   -1.2994477
   -1.2697017 ]
  [-1.7031051  -1.3293248  -1.1274819  ... -1.3260882  -1.6932185
   -1.6551706 ]
  [-1.5423517  -1.6933872  -1.0883901  ... -1.1058494  -1.4281152
   -1.3857387 ]
  ...
  [-1.5255969  -1.9334999  -2.398081   ... -0.45181632 -0.33623195
   -0.01154986]
  [-1.7783931  -1.9952062  -2.2240667  ... -0.6589

In [16]:
print_stats(X_test)

Shape: (80, 501, 40)
Dtype: float32
 - Max:      3.001
 - Min:     -5.737
 - Mean:     0.000
 - Std Dev:  1.000

[[[-0.00843373 -0.16407987 -0.35454273 ...  0.01356961  0.25430247
   -0.04420633]
  [-0.794247   -0.8696532  -1.1995608  ...  0.04398429  0.19106206
    0.19056077]
  [-0.6424043  -1.2959164  -0.7824867  ... -0.302099   -0.30073217
   -0.33161345]
  ...
  [ 0.5197813   0.22296141  0.05666156 ...  0.6617579   0.78970486
    0.8622248 ]
  [ 1.1184468   1.230051    1.1536006  ...  1.6380352   1.601691
    1.4871472 ]
  [ 1.8192701   1.6836054   1.4473128  ...  1.2941493   1.2192361
    1.1516905 ]]

 [[ 1.0874711   0.86977625  0.65228176 ...  0.32489803  0.38995054
    0.43021625]
  [ 0.81258947  0.4120052   0.09732649 ...  0.7051672   0.8376521
    0.58030796]
  [ 0.1722575   1.0288732   1.2430522  ...  0.9216234   0.612769
    0.53581053]
  ...
  [ 1.3794888   1.2383139   0.90612286 ...  1.4794487   1.2505019
    1.3551399 ]
  [ 0.9436676   0.42402577  0.1282185  ...  1.3817

In [17]:
save_numpy_arrays([X_train, y_train, X_val, y_val, X_test, y_test], 
                  ['data_train_X.npy','data_train_y.npy', 'data_val_X.npy','data_val_y.npy', 'data_test_X.npy', 'data_test_y.npy'],
                 path_prefix='processed_data/processed_waves/4-genres/')

In [18]:
X_train, y_train, X_val, y_val, X_test, y_test = load_numpy_arrays(['data_train_X.npy','data_train_y.npy', 'data_val_X.npy','data_val_y.npy', 'data_test_X.npy', 'data_test_y.npy']
                  , path_prefix='processed_data/processed_waves/4-genres/')