In [1]:
import os

In [2]:
os.chdir('..')

In [3]:
os.listdir()

['.git',
 '.gitignore',
 '.idea',
 'data_loader',
 'data_processor',
 'jupyter',
 'loggers',
 'main.py',
 'models',
 'original_data',
 'processed_data',
 'README.md',
 'requirements.txt',
 'saved_models',
 'trainers']

In [4]:
from data_loader.music_wave_loader import get_music_file_names
from data_processor.utils import split_data, normalize_data, FunctionArrayExecutor, save_numpy_arrays, load_numpy_arrays
from data_processor.waves_data_processor import generate_wave_features
from loggers.statistics_loggers import print_stats
import speechbrain as sb
from speechbrain.processing.features import STFT, spectral_magnitude, Filterbank
import torch
import numpy as np

The torchaudio backend is switched to 'soundfile'. Note that 'sox_io' is not supported on Windows.
The torchaudio backend is switched to 'soundfile'. Note that 'sox_io' is not supported on Windows.


In [5]:
music_name_lists = get_music_file_names('original_data/genres_original/', 
                                        ['rock', 'classical', 'metal', 'disco', 'blues', 'reggae', 'country', 'hiphop', 'jazz', 'pop'])

In [6]:
#print(music_name_lists)

In [7]:
compute_STFT = STFT(
    sample_rate=22050, win_length=15, hop_length=20, n_fft=400
)

compute_fbanks = Filterbank(n_mels=40, sample_rate = 22050)

In [8]:
extract_wave_features = FunctionArrayExecutor([compute_STFT, spectral_magnitude, compute_fbanks])

In [9]:
features, labels = generate_wave_features(music_name_lists, 10, extract_wave_features, root='original_data/genres_original/')

In [10]:
print_stats(features)

Shape: (2988, 501, 40)
Dtype: torch.float32
 - Max:     39.016
 - Min:     -70.695
 - Mean:    -1.979
 - Std Dev: 13.695

tensor([[[ 7.1592e+00,  5.6211e+00,  5.2819e+00,  ..., -1.5695e+01,
          -2.1134e+01, -1.8806e+01],
         [ 5.1370e+00,  5.2218e+00,  7.1488e+00,  ..., -1.3223e+01,
          -1.1551e+01, -2.0770e+01],
         [ 7.8673e+00,  9.0765e+00,  7.9763e+00,  ..., -6.0891e+00,
          -5.5195e+00, -1.5894e+01],
         ...,
         [ 9.9964e+00,  6.9721e+00,  4.0837e+00,  ..., -4.2699e+00,
          -3.5919e+00, -4.2008e+00],
         [-8.5661e-01,  8.3650e-01,  8.0525e-02,  ...,  5.7160e-01,
           5.5786e-01, -3.8375e+00],
         [ 1.0599e+01,  1.1279e+01,  1.0326e+01,  ..., -9.2104e-01,
          -4.4086e+00, -7.8152e+00]],

        [[ 9.4522e+00,  1.1062e+01,  1.1633e+01,  ...,  2.6631e+00,
          -1.3678e+00, -1.4419e+00],
         [-8.6365e-01,  9.6416e+00,  1.2896e+01,  ...,  1.0327e+01,
           1.3919e+01,  5.5544e+00],
         [ 4.0896e-01,

In [11]:
print(labels.shape)
print(labels)

torch.Size([2988])
tensor([0., 0., 0.,  ..., 9., 9., 9.])


In [12]:
X_train, y_train, X_val, y_val, X_test, y_test = split_data(features, labels)

In [13]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
X_train = X_train.numpy()
X_val = X_val.numpy()
X_test = X_test.numpy()

torch.Size([2390, 501, 40])
torch.Size([299, 501, 40])
torch.Size([299, 501, 40])


In [14]:
X_train, X_val, X_test = normalize_data([X_train, X_val, X_test])

In [15]:
print_stats(X_train)

Shape: (2390, 501, 40)
Dtype: float32
 - Max:      3.490
 - Min:     -6.966
 - Mean:     0.000
 - Std Dev:  1.000

[[[ 0.4324319   0.5235686   0.539371   ...  0.7696717   0.70545137
    0.38661122]
  [ 0.7033844   0.6646656   0.48968515 ...  0.4557373   0.37470293
    0.28158787]
  [ 0.6878732   0.5070853   0.17866641 ...  0.11812218  0.21876432
    0.33382416]
  ...
  [ 0.96281433  0.99042165  0.913043   ...  0.3897332   0.30100504
    0.04069782]
  [ 0.81184167  0.8570251   0.7527065  ...  0.18112832  0.20391558
   -0.01947931]
  [ 0.47459522  0.54485965  0.494685   ... -0.20567213 -0.43880492
   -0.44469675]]

 [[ 0.4593697   0.46746537  0.36676162 ...  2.0687144   1.9302484
    1.8145031 ]
  [ 0.29386458  0.42053083  0.43999678 ...  1.4999044   1.5503665
    1.7049212 ]
  [ 0.16162515  0.50463533  0.5550691  ...  1.4331344   1.5867832
    1.5067407 ]
  ...
  [ 0.9495273   0.88628864  0.709363   ...  1.095192    0.98355347
    0.8689831 ]
  [ 1.0198374   0.91800696  0.7499961  ...  

In [16]:
print_stats(X_test)

Shape: (299, 501, 40)
Dtype: float32
 - Max:      3.273
 - Min:     -6.894
 - Mean:    -0.000
 - Std Dev:  1.000

[[[-7.21423090e-01 -5.22287428e-01 -3.60289365e-01 ... -5.01120329e-01
   -6.41531169e-01 -5.87834418e-01]
  [-1.75856143e-01 -2.10751966e-01 -1.72389939e-01 ... -1.02217495e+00
   -1.16527343e+00 -1.31108689e+00]
  [-2.88261831e-01 -2.52403975e-01 -3.84938084e-02 ... -1.15346074e+00
   -1.37879086e+00 -1.40814006e+00]
  ...
  [ 9.33891654e-01  7.08390117e-01  4.79303271e-01 ...  7.02170849e-01
    6.73808157e-01  6.89280033e-01]
  [ 9.28216517e-01  7.55937576e-01  5.31520665e-01 ...  1.05713136e-01
    1.21589631e-01  2.33861119e-01]
  [ 8.27440143e-01  8.96312952e-01  9.26336646e-01 ... -7.18217641e-02
   -2.13481989e-02 -2.00158879e-02]]

 [[ 3.50939184e-01  3.99787575e-01  5.60280561e-01 ...  2.36264989e-02
    7.07784817e-02  1.01495169e-01]
  [ 2.95927107e-01  1.10305712e-01  3.61811966e-01 ...  1.67332247e-01
    2.37747326e-01  4.27217603e-01]
  [ 4.44775164e-01  1.

In [17]:
save_numpy_arrays([X_train, y_train, X_val, y_val, X_test, y_test], 
                  ['data_train_X.npy','data_train_y.npy', 'data_val_X.npy','data_val_y.npy', 'data_test_X.npy', 'data_test_y.npy'],
                 path_prefix='processed_data/processed_waves/10-genres/')

In [18]:
X_train, y_train, X_val, y_val, X_test, y_test = load_numpy_arrays(['data_train_X.npy','data_train_y.npy', 'data_val_X.npy','data_val_y.npy', 'data_test_X.npy', 'data_test_y.npy']
                  , path_prefix='processed_data/processed_waves/10-genres/')