In [1]:
from scipy.io.wavfile import read as read_wav
from IPython.display import Audio
import matplotlib.pyplot as plt
import tensorflow as tf
import sklearn as skl
import librosa as lr
import numpy as np
import logging
import random
import scipy
import os

In [2]:
np.random.seed(5)
logging.warnings.filterwarnings('ignore')

In [3]:
dataset_root = '.'

In [4]:
genres = {
    'disco',
    'country',
    'jazz',
    'classical',
    'blues',
    'hiphop',
    'metal',
    'rock',
    'pop',
    'reggae'
}

### Hyper Parameters:

In [5]:
hparams = {
    'samplerate': 22050,
    'seq_length': 256,
    'hop_length': 512,
    'fft_window': 2048
}

### Extract Features:

In [6]:
def extract_features(filepath, hparams):
    
    audio_data_array = lr.load(filepath, sr=hparams['samplerate'], mono=True)
    
    features = [
        
        # MFCC Features:
        lr.feature.mfcc(
            y          = audio_data_array[0],
            sr         = hparams['samplerate'],
            hop_length = hparams['hop_length'],
            n_fft      = hparams['fft_window']
        ),

        # Chroma STFT:
        lr.feature.chroma_stft(
            y          = audio_data_array[0],
            sr         = hparams['samplerate'],
            hop_length = hparams['hop_length'],
            n_fft      = hparams['fft_window']
        ),
        
        # Spectral Centroid:
        lr.feature.spectral_centroid(
            y          = audio_data_array[0],
            sr         = hparams['samplerate'],
            hop_length = hparams['hop_length'],
            n_fft      = hparams['fft_window']
        ),
        
        # Spectral Contrast:
        lr.feature.spectral_contrast(
            y          = audio_data_array[0],
            sr         = hparams['samplerate'],
            hop_length = hparams['hop_length'],
            n_fft      = hparams['fft_window']
        )
    ]
    
    return np.vstack(features)

### Preprocessing:

In [7]:
processed = []

In [8]:
preprocess = True

if preprocess == True:
    
    counter = 0
    
    for genre in genres:
        
        print('================================================')
        print('PREPARING: Genre: {}'.format(genre))
        
        m = hparams['seq_length']
        filelist = os.listdir(dataset_root + '/dataset/' + genre)
        
        for file in filelist:
            features = extract_features(dataset_root + '/dataset/' + genre + '/' + file, hparams)
            
            for i in range(5):
                
                json = {
                    'index': (counter // 5) % 100,
                    'features': np.moveaxis(features[:, (i * m): ((i+1) * m)], 0, 1),
                    'ground_truth': genre
                }
                
                processed += [json]
                counter += 1
        
        print('COMPLETE: Genre: {}'.format(genre))
                
    if 'prepared' not in os.listdir('.'):
        os.mkdir('prepared')
    np.save('./prepared/processed.npy', processed)
    
else:
    train_data_x = np.load('./prepared/processed.npy')

PREPARING: Genre: jazz
COMPLETE: Genre: jazz
PREPARING: Genre: hiphop
COMPLETE: Genre: hiphop
PREPARING: Genre: metal
COMPLETE: Genre: metal
PREPARING: Genre: country
COMPLETE: Genre: country
PREPARING: Genre: disco
COMPLETE: Genre: disco
PREPARING: Genre: pop
COMPLETE: Genre: pop
PREPARING: Genre: reggae
COMPLETE: Genre: reggae
PREPARING: Genre: rock
COMPLETE: Genre: rock
PREPARING: Genre: blues
COMPLETE: Genre: blues
PREPARING: Genre: classical
COMPLETE: Genre: classical
