# Vanilla Binary RNN Model

This notebook is used to train a simple Vanilla RNN model for Binary classification of Piano and Electric Guitar. We chose the following two classes because there numbers are really close to each other, hence would be able to solve the class imbalance issue beforehand and dont have to worry about it.

### Importing Required Libraries

This section will import the required libaries that will be used to actually implement the training for the Vanilla RNN Binary Classification Model

In [4]:
import numpy as np
import pandas as pd
import os
import glob
from keras.models import Sequential, Input
from keras.layers import SimpleRNN, Dense
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
# fix random seed for reproducibility
np.random.seed(7)

### Training Data Creator

This section uses the script from the training data creation module to load the training data

In [9]:
import numpy as np
import os
import glob
from scipy.io import wavfile
from typing import List


def exist_directory(directory_path: str):
    return os.path.isdir(directory_path)


def exist_file(file_path: str):
    return os.path.isfile(file_path)


def get_directory_contents(directory_path: str, pattern: str):
    if exist_directory(directory_path):
        # Get the current working directory
        cwd = os.getcwd()
        # Change the directory into the target directory path
        os.chdir(directory_path)
        # Get the list of directory contents
        directory_contents = glob.glob(pattern)
        # Change back to the original process working directory
        os.chdir(cwd)
        # Return back the directory contents
        return directory_contents
    else:
        return []
    

def get_subdirectory_names(directory_path: str):
    return list(map(lambda subdirectory_name: subdirectory_name.replace('\\', '').replace('//', ''),
                    get_directory_contents(directory_path, '*//')))


def get_file_names(directory_path: str, file_extension: str):
    return get_directory_contents(directory_path, f'*{file_extension}')


def construct_path(directory_path: str, directory_content_name: str):
    return os.path.join(directory_path, directory_content_name)


def read_wav_file(file_path: str):
    if exist_file(file_path):
        return wavfile.read(file_path)
    else:
        return 44100, np.array([])
    

def get_sound_signals(wav_file_data):
    return wav_file_data[-1]


def shift_sound_signals(sound_signals: np.array):
    return np.int32(sound_signals + 2**15)


def normalize_sound_signals(sound_signals: np.array):
    return np.float32(sound_signals / (2. ** 15))


def get_left_channel_data(sound_signals: np.array):
    return sound_signals[:, 0]


def get_right_channel_data(sound_signals: np.array):
    return sound_signals[:, -1]


def get_sound_feature_vectors_from_file(file_path: str, normalize: bool = True, shift: bool = False):
    if normalize:
        sound_signals = normalize_sound_signals(get_sound_signals(read_wav_file(file_path)))
    else:
        sound_signals = get_sound_signals(read_wav_file(file_path))
        if shift:
            sound_signals = shift_sound_signals(sound_signals)
    left_channel_feature_vector, right_channel_feature_vector = \
        get_left_channel_data(sound_signals), get_right_channel_data(sound_signals)
    return left_channel_feature_vector, right_channel_feature_vector


def stack_data(feature_matrix: np.array, left_channel_features: np.array, right_channel_features: np.array):
    if len(feature_matrix) == 0:
        return np.stack((left_channel_features, right_channel_features))
    else:
        return np.vstack((feature_matrix, left_channel_features, right_channel_features))


def get_class_data(parent_directory_path: str, class_label: str, number_of_examples: int = 0,
                   normalize: bool = True, shift: bool = False):
    # Initializing the class feature matrix and target vector
    class_feature_matrix = []
    class_target_vector = []
    # Construct the path
    class_directory_path = construct_path(parent_directory_path, class_label)
    # Make sure the given path is correct
    if not exist_directory(class_directory_path):
        return np.array(class_feature_matrix), np.array(class_target_vector)
    
    # Get the names of the wav file belonging to the current class
    wav_file_names = set(get_file_names(class_directory_path, '.wav'))
    # Get the subset of the classes, if want only limited number of training examples
    if number_of_examples:
        wav_file_names = wav_file_names[:np.abs(number_of_examples)]
    print(f'Processing: {len(wav_file_names)} files')
    # Iterate through each wav file
    for file_name in wav_file_names:
        file_path = construct_path(class_directory_path, file_name)
        left_channel_features, right_channel_features = \
            get_sound_feature_vectors_from_file(file_path, normalize, shift)
        class_feature_matrix += [left_channel_features, right_channel_features]
        class_target_vector += [class_label]*2
        
    return np.array(class_feature_matrix), np.array(class_target_vector)


def load_irmas_data(parent_directory_path: str, class_labels_to_process: List[str], 
                    number_of_training_examples_per_class: int = 0, normalize: bool = True,
                    shift: bool = False):
    if not exist_directory(parent_directory_path):
        print(f'Invalid directory: {parent_directory_path}')
    
    class_labels = class_labels_to_process if class_labels_to_process else get_subdirectory_names(parent_directory_path)
    
    feature_matrix = np.array([])
    target_vector = np.array([])
    
    for class_label in class_labels:
        print(f'Getting Data from {class_label}')
        class_feature_matrix, class_target_vector = get_class_data(parent_directory_path, class_label, 
                                                                   number_of_training_examples_per_class, normalize,
                                                                   shift)
        print(f'Loaded all the data from the class')
        if feature_matrix.size:
            feature_matrix = np.vstack((feature_matrix, class_feature_matrix))
            target_vector = np.hstack((target_vector, class_target_vector))
        else:
            feature_matrix = class_feature_matrix
            target_vector = class_target_vector
    
    return feature_matrix, target_vector

In [10]:
X_train, y_train = load_irmas_data(f'../../data/whole_dataset/training/', 
                                   ['pia', 'gac'], normalize = False, shift = True)

Getting Data from pia
Processing: 721 files
Loaded all the data from the class
Getting Data from gac
Processing: 637 files
Loaded all the data from the class


In [14]:
print(f'Total Number of Training Samples: {y_train.shape}')
print(f'Total number of features for each sample: {X_train.shape[-1]}')
print(f'Minimum Feature Value: {np.min(X_train)}, Maximum Feature Value: {np.max(X_train)}')

Total Number of Training Samples: (2716,)
Total number of features for each sample: 132299
Minimum Feature Value: 0, Maximum Feature Value: 65535


In [15]:
y_train_categorical = pd.Categorical(y_train)
y_train_numerical = y_train_categorical.codes

### Model Definition

This section will define the model architecture that will be used for the training purposes

In [20]:
# Defining the parameters for the Embedding layer
number_of_features = 132299
embedding_vector_length = 10
highest_val = np.max(X_train)+1

In [21]:
model = Sequential()
model.add(Embedding(highest_val, embedding_vector_length, 
                    input_length=number_of_features))
model.add(SimpleRNN(10))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 132299, 10)        655360    
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, 10)                210       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 11        
Total params: 655,581
Trainable params: 655,581
Non-trainable params: 0
_________________________________________________________________


In [22]:
model.fit(X_train, y_train_numerical, epochs=3, batch_size=64)

Epoch 1/3

KeyboardInterrupt: 

In [None]:
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))