In [1]:
from scipy.io.wavfile import read as read_wav
from IPython.display import Audio
import matplotlib.pyplot as plt
import tensorflow as tf
import sklearn as skl
import librosa as lr
import numpy as np
import logging
import random
import scipy
import os

In [2]:
np.random.seed(5)
logging.warnings.filterwarnings('ignore')

In [3]:
dataset_root = '.'

### Genre Dictionary:

In [4]:
genres = {
    'disco': 0,
    'country': 1
}

### Hyper Parameters:

In [5]:
hparams = {
    'samplerate': 22050,
    'seq_length': 256,
    'hop_length': 512,
    'fft_window': 2048,
    'num_classes': len(genres)
}

### Extract Features:

In [6]:
def extract_features(filepath, hparams):
    
    audio_data_array = lr.load(filepath, sr=hparams['samplerate'], mono=True)
    
    normalized = lr.util.normalize(audio_data_array[0])
    
    features = [
        
        # MFCC Features:
        lr.feature.mfcc(
            y          = normalized,
            sr         = hparams['samplerate'],
            hop_length = hparams['hop_length'],
            n_fft      = hparams['fft_window']
        ),

        # Chroma STFT:
        lr.feature.chroma_stft(
            y          = normalized,
            sr         = hparams['samplerate'],
            hop_length = hparams['hop_length'],
            n_fft      = hparams['fft_window']
        ),
        
        # Spectral Centroid:
        lr.feature.spectral_centroid(
            y          = normalized,
            sr         = hparams['samplerate'],
            hop_length = hparams['hop_length'],
            n_fft      = hparams['fft_window']
        ),
        
        # Spectral Contrast:
        lr.feature.spectral_contrast(
            y          = normalized,
            sr         = hparams['samplerate'],
            hop_length = hparams['hop_length'],
            n_fft      = hparams['fft_window']
        )
    ]
    
    return np.vstack(features)

### Disco Class:

In [7]:
class_1 = [
    'disco'
]

### Country Class:

In [8]:
class_2 = [
    'country'
]

### Allocating Arrays to Store Dataset:

In [9]:
train_data_x = np.zeros(shape=(5 * 140, 40, hparams['seq_length']))
val_data_x = np.zeros(shape=(5 * 40, 40, hparams['seq_length']))
test_data_x = np.zeros(shape=(5 * 20, 40, hparams['seq_length']))

train_data_y = np.zeros(shape=(5 * 140, 2))
val_data_y = np.zeros(shape=(5 * 40, 2))
test_data_y = np.zeros(shape=(5 * 20, 2))

### Train Dataset:

In [10]:
preprocess = False

if preprocess == True:

    train_data_counter = 0
    val_data_counter = 0
    test_data_counter = 0

    for genre in class_1:

        print('Genre:', genre, 'Processed.')
        
        m = hparams['seq_length']

        filelist = os.listdir('./dataset/' + genre)

        for file in filelist[0:70]:
            features = extract_features(dataset_root + '/dataset/' + genre + '/' + file, hparams)
            for i in range(5):
                train_data_x[train_data_counter, :] = features[:, (i * m): ((i+1) * m)]
                train_data_y[train_data_counter] = tf.keras.utils.to_categorical(0, num_classes=2)
                train_data_counter += 1
        
        for file in filelist[70:90]:
            features = extract_features(dataset_root + '/dataset/' + genre + '/' + file, hparams)
            for i in range(5):
                val_data_x[val_data_counter, :] = features[:, (i * m): ((i+1) * m)]
                val_data_y[val_data_counter] = tf.keras.utils.to_categorical(0, num_classes=2)
                val_data_counter += 1
                
        for file in filelist[90:100]:
            features = extract_features(dataset_root + '/dataset/' + genre + '/' + file, hparams)
            for i in range(5):
                test_data_x[test_data_counter, :] = features[:, (i * m): ((i+1) * m)]
                test_data_y[test_data_counter] = tf.keras.utils.to_categorical(0, num_classes=2)
                test_data_counter += 1
        

    for genre in class_2:

        print('Genre:', genre, 'Processed.')
        
        m = hparams['seq_length']

        filelist = os.listdir('./dataset/' + genre)

        for file in filelist[0:70]:
            features = extract_features(dataset_root + '/dataset/' + genre + '/' + file, hparams)
            for i in range(5):
                train_data_x[train_data_counter, :] = features[:, (i * m): ((i+1) * m)]
                train_data_y[train_data_counter] = tf.keras.utils.to_categorical(1, num_classes=2)
                train_data_counter += 1
        
        for file in filelist[70:90]:
            features = extract_features(dataset_root + '/dataset/' + genre + '/' + file, hparams)
            for i in range(5):
                val_data_x[val_data_counter, :] = features[:, (i * m): ((i+1) * m)]
                val_data_y[val_data_counter] = tf.keras.utils.to_categorical(1, num_classes=2)
                val_data_counter += 1
                
        for file in filelist[90:100]:
            features = extract_features(dataset_root + '/dataset/' + genre + '/' + file, hparams)
            for i in range(5):
                test_data_x[test_data_counter, :] = features[:, (i * m): ((i+1) * m)]
                test_data_y[test_data_counter] = tf.keras.utils.to_categorical(1, num_classes=2)
                test_data_counter += 1
            
    if 'prepared' not in os.listdir('.'):
        os.mkdir('prepared')
    np.save('./prepared/train_data_x.npy', train_data_x, allow_pickle=False)
    np.save('./prepared/train_data_y.npy', train_data_y, allow_pickle=False)
    np.save('./prepared/val_data_x.npy', val_data_x, allow_pickle=False)
    np.save('./prepared/val_data_y.npy', val_data_y, allow_pickle=False)
    np.save('./prepared/test_data_x.npy', test_data_x, allow_pickle=False)
    np.save('./prepared/test_data_y.npy', test_data_y, allow_pickle=False)
else:
    train_data_x = np.load('./prepared/train_data_x.npy', allow_pickle=False)
    train_data_y = np.load('./prepared/train_data_y.npy', allow_pickle=False)
    val_data_x = np.load('./prepared/val_data_x.npy', allow_pickle=False)
    val_data_y = np.load('./prepared/val_data_y.npy', allow_pickle=False)
    test_data_x = np.load('./prepared/test_data_x.npy', allow_pickle=False)
    test_data_y = np.load('./prepared/test_data_y.npy', allow_pickle=False)

Genre: disco Processed.
Genre: country Processed.


In [11]:
train_data_x  = np.moveaxis(train_data_x, 1, 2)
val_data_x    = np.moveaxis(val_data_x,   1, 2)
test_data_x   = np.moveaxis(test_data_x,  1, 2)

### Model Creation and Training:

In [12]:
class GenreClassifierModel(tf.keras.Model):
    
    def __init__(self, l1, l2):
        
        super().__init__()
        
        self.L1 = tf.keras.layers.LSTM(l1, return_sequences=True)
        self.L2 = tf.keras.layers.LSTM(l2, return_sequences=False)
        self.L3 = tf.keras.layers.Dense(units=hparams['num_classes'], activation="softmax")
        
    def call(self, inputs):
        self.x = self.L1(inputs)
        self.x = self.L2(self.x)
        self.x = self.L3(self.x)
        return self.x

### Training Model:

In [13]:
def create_model(config):
    
    l1 = random.choice(config['L1'])
    l2 = random.choice(config['L2'])
    
    # Model Definition:
    model = GenreClassifierModel(l1, l2)
    
    # Model Compilation:
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return {'L1': l1, 'L2': l2}, model

In [14]:
def grid_search(config, n_trials):
    
    train_stats = []
    
    global train_data_x, val_data_x, test_data_x
    global train_data_y, val_data_y, test_data_y
    
    for i in range(n_trials):
        
        with open('train_info.txt', 'a+') as file:
            model_config, model = create_model(config)

            # Train Model:
            hist = model.fit(train_data_x, train_data_y, epochs=50, verbose=0, shuffle=True)

            # Evaluate Model:
            val_loss, val_acc = model.evaluate(val_data_x, val_data_y, verbose=0)

            print('L1: {}, L2: {}, Validation Loss: {}, Validation Accuracy: {}'.format(
                str(model_config['L1']).rjust(3), 
                str(model_config['L2']).rjust(3), 
                str(round(val_loss, 4)).rjust(7), 
                str(round(val_acc, 4)).rjust(7)
            ), file=file)

In [None]:
train_hist = grid_search({
    'L1': [i for i in range(16, 128, 4)],
    'L2': [i for i in range(16, 128, 4)]
}, 100)