In [1]:
import os

In [2]:
os.chdir('..')

In [3]:
os.listdir()

['.git',
 '.idea',
 'data_loader',
 'data_processor',
 'jupyter',
 'loggers',
 'main.py',
 'models',
 'original_data',
 'processed_data',
 'README.md',
 'saved_models',
 'trainers']

In [18]:
from data_loader.music_wave_loader import get_music_file_names
from data_processor.data_processor_toolkit import split_data, normalize_data, FunctionArrayExecutor, save_numpy_arrays, load_numpy_arrays
from data_processor.waves_data_processor import generate_wave_features
from loggers.statistics_loggers import print_stats
import speechbrain as sb
from speechbrain.processing.features import STFT, spectral_magnitude, Filterbank
import torch
import numpy as np

In [5]:
music_name_lists = get_music_file_names('original_data/genres_original/', ['blues', 'classical', 'country', 'disco'])

In [6]:
print(music_name_lists)

[['blues/blues.00000.wav', 'blues/blues.00001.wav', 'blues/blues.00002.wav', 'blues/blues.00003.wav', 'blues/blues.00004.wav', 'blues/blues.00005.wav', 'blues/blues.00006.wav', 'blues/blues.00007.wav', 'blues/blues.00008.wav', 'blues/blues.00009.wav', 'blues/blues.00010.wav', 'blues/blues.00011.wav', 'blues/blues.00012.wav', 'blues/blues.00013.wav', 'blues/blues.00014.wav', 'blues/blues.00015.wav', 'blues/blues.00016.wav', 'blues/blues.00017.wav', 'blues/blues.00018.wav', 'blues/blues.00019.wav', 'blues/blues.00020.wav', 'blues/blues.00021.wav', 'blues/blues.00022.wav', 'blues/blues.00023.wav', 'blues/blues.00024.wav', 'blues/blues.00025.wav', 'blues/blues.00026.wav', 'blues/blues.00027.wav', 'blues/blues.00028.wav', 'blues/blues.00029.wav', 'blues/blues.00030.wav', 'blues/blues.00031.wav', 'blues/blues.00032.wav', 'blues/blues.00033.wav', 'blues/blues.00034.wav', 'blues/blues.00035.wav', 'blues/blues.00036.wav', 'blues/blues.00037.wav', 'blues/blues.00038.wav', 'blues/blues.00039.wav'

In [7]:
compute_STFT = STFT(
    sample_rate=22050, win_length=15, hop_length=20, n_fft=400
)

compute_fbanks = Filterbank(n_mels=20, sample_rate = 22050)

In [8]:
extract_wave_features = FunctionArrayExecutor([compute_STFT, spectral_magnitude, compute_fbanks])

In [9]:
features, labels = generate_wave_features(music_name_lists, 10, extract_wave_features, root='original_data/genres_original/')

In [10]:
print_stats(features)

Shape: (800, 501, 20)
Dtype: torch.float32
 - Max:     39.279
 - Min:     -66.998
 - Mean:    -0.684
 - Std Dev: 14.762

tensor([[[ 11.9454,  12.5302,  12.1146,  ...,  -5.1765,  -4.0154,  -4.1931],
         [ 17.5069,  14.4472,  20.0307,  ...,  -1.2260,  -1.7406,  -3.6223],
         [ 27.1193,  22.0770,  17.5370,  ...,   2.7144,   1.1754,  -5.2868],
         ...,
         [ 19.3458,  13.9558,   4.8939,  ..., -17.3581, -12.9427, -18.6065],
         [ 17.4044,  10.9558,   5.4055,  ..., -10.0483, -10.1682, -13.6715],
         [  8.9726,  11.0729,   8.1804,  ...,  -5.1315,  -9.1288,  -7.8934]],

        [[ 13.3646,  10.5928,  11.0000,  ...,  -4.0338,  -2.4459,  -6.9524],
         [ 16.7657,  15.4793,  13.4194,  ...,  -5.1850,  -6.3028, -10.6822],
         [ 15.6548,  10.4879,   7.0636,  ...,  -4.4819,  -6.2419, -11.2642],
         ...,
         [ 11.1271,  15.1921,  10.7333,  ...,   0.9374,   1.9401,   9.1297],
         [ 15.7338,  13.7548,  10.5471,  ...,   2.3973,   1.8035,  11.1272],
  

In [11]:
X_train, y_train, X_val, y_val, X_test, y_test = split_data(features, labels)

In [12]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
X_train = X_train.numpy()
X_val = X_val.numpy()
X_test = X_test.numpy()

torch.Size([640, 501, 20])
torch.Size([80, 501, 20])
torch.Size([80, 501, 20])


In [13]:
X_train, X_val, X_test = normalize_data([X_train, X_val, X_test])

In [14]:
print_stats(X_train)

Shape: (640, 501, 20)
Dtype: float32
 - Max:      2.951
 - Min:     -6.516
 - Mean:    -0.000
 - Std Dev:  1.000

[[[-2.6792672  -3.5518866  -4.738893   ... -3.0967405  -3.5594199
   -3.4670434 ]
  [-3.1633809  -3.8502784  -4.652638   ... -2.4343402  -2.6495504
   -2.7823262 ]
  [-2.8878083  -3.4746716  -4.822417   ... -2.5756056  -2.5007057
   -2.7432652 ]
  ...
  [-0.24727474  0.06162699  0.49437433 ... -0.88487643 -1.025537
   -0.94578105]
  [-0.1620957   0.08019606  0.10297393 ... -0.95313245 -0.96736276
   -0.90410984]
  [-0.2025087  -0.11488556  0.03381225 ... -0.23887278 -0.23792648
   -0.18730788]]

 [[-1.6208766  -1.382843   -0.7747273  ... -1.4323992  -1.3947951
   -1.3322134 ]
  [-1.4203833  -1.2709665  -1.0766394  ... -1.1706297  -1.1788316
   -1.5762501 ]
  [-1.5573021  -0.53738594  0.06833315 ... -0.95460486 -1.0789422
   -1.341246  ]
  ...
  [-2.0320206  -0.7475014   0.11836525 ...  0.1985635  -0.16320008
   -0.27016926]
  [-2.1439638  -1.0544697  -0.1505503  ...  0.6728

In [15]:
print_stats(X_test)

Shape: (80, 501, 20)
Dtype: float32
 - Max:      2.743
 - Min:     -5.770
 - Mean:    -0.000
 - Std Dev:  1.000

[[[-1.82153076e-01 -6.37598634e-01 -1.51593888e+00 ...  2.35316530e-01
    4.72074270e-01  1.24815576e-01]
  [-1.03896987e+00 -1.70838308e+00 -2.36451912e+00 ...  1.95190832e-01
    1.75815374e-01  1.38420016e-01]
  [-1.08310401e+00 -7.38031745e-01 -1.16148233e+00 ... -8.36568847e-02
   -1.16922565e-01 -3.35032851e-01]
  ...
  [ 2.33404845e-01 -4.02267501e-02  3.86529937e-02 ...  1.46256173e+00
    1.63436425e+00  7.65903234e-01]
  [ 1.16689301e+00  1.01222789e+00  8.66562963e-01 ...  2.16707015e+00
    1.88816071e+00  1.56275105e+00]
  [ 1.72049749e+00  1.23933566e+00  4.05363560e-01 ...  1.57361436e+00
    1.40223122e+00  1.20750535e+00]]

 [[ 8.92508447e-01  4.87258226e-01  1.06979832e-02 ...  6.94806576e-01
    4.50920999e-01  3.63732219e-01]
  [ 4.27473485e-01  2.30457798e-01  7.20724940e-01 ...  1.20730710e+00
    9.12641943e-01  7.40972102e-01]
  [ 9.95525241e-01  1.2

In [16]:
save_numpy_arrays([X_train, y_train, X_val, y_val, X_test, y_test], 
                  ['data_train_X.npy','data_train_y.npy', 'data_val_X.npy','data_val_y.npy', 'data_test_X.npy', 'data_test_y.npy'],
                 path_prefix='processed_data/processed_waves/4-genres/')

In [19]:
X_train, y_train, X_val, y_val, X_test, y_test = load_numpy_arrays(['data_train_X.npy','data_train_y.npy', 'data_val_X.npy','data_val_y.npy', 'data_test_X.npy', 'data_test_y.npy']
                  , path_prefix='processed_data/processed_waves/4-genres/')