# Instrument Classification

We start our first trial by trying to recognise piano (pia), acoustic guitar (gac), saxophone (sax) and voice (voi).  

In [1]:
import numpy as np
import librosa
import matplotlib.pyplot as plt
import os
import math
from random import shuffle
import keras
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Dropout,Flatten,Activation,BatchNormalization

Using TensorFlow backend.


In [2]:
def split_train_test(dir_array):
    train_list = []
    test_list = []
    list = []
    for dir in dir_array:
        for f in os.listdir(dir):
            list.append(dir+f)
    shuffle(list)
    split_index = math.floor(len(list)*0.90)
    train_list = train_list + list[:split_index]
    test_list = test_list + list[split_index:]
    return (train_list,test_list)

def pre_processing(trainList,testList):
    x_train = []
    y_train = []
    x_test = []
    y_test = []
    for y in [trainList,testList]:
        for f in y:
            label = f.split('/')
            wav,sr = librosa.load(f,mono=True)
            wav = wav / np.sqrt(np.mean(wav**2))
            mfcc = librosa.feature.melspectrogram(wav,n_mels =96,n_fft=1024,hop_length=256)
            if (y == trainList):
                y_train.append(label[4])
                x_train.append(mfcc)
            else:
                y_test.append(label[4])
                x_test.append(mfcc)
    return (x_train,y_train,x_test,y_test)

# [pia,gac,gel]
def to_categorical(list):
    temp_list = list
    for i in range(len(list)):
        if (list[i] == 'pia'):
            #temp_list[i] = [1,0,0,0]
            temp_list[i] = [1,0]
        elif (list[i] == 'gac'):
            temp_list[i] = [0,1]
        #elif (list[i] == 'sax'):
            #temp_list[i] = [0,0,1,0]
        #elif (list[i] == 'voi'):
            #temp_list[i] = [0,0,0,1]
        else:
            print("error")
    return temp_list

In [3]:
# ~10 minutes to run
dirpia = './Desktop/Instrument-Classification/IRMAS-TrainingData/pia/'
dirgac = './Desktop/Instrument-Classification/IRMAS-TrainingData/gac/'
#dirsax = './Desktop/Instrument-Classification/IRMAS-TrainingData/sax/'
#dirvoi = './Desktop/Instrument-Classification/IRMAS-TrainingData/voi/'

(trainList,testList) = split_train_test([dirpia,dirgac])
(x_train,y_train,x_test,y_test) = pre_processing(trainList,testList)

y_train_new = np.array(to_categorical(y_train))
y_test_new = np.array(to_categorical(y_test))
x_train_new = np.expand_dims(np.array(x_train),-1)
x_test_new = np.expand_dims(np.array(x_test),-1)

print("Train Size: ", x_train_new.shape)
print("Test Size:  ", x_test_new.shape)

Train Size:  (2485, 96, 259, 1)
Test Size:   (277, 96, 259, 1)


In [6]:
model = Sequential()
model.add(Conv2D(64,kernel_size=(3,3),input_shape = x_train_new[0].shape))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.1))
model.add(Conv2D(128,kernel_size=(3,3)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.1))
model.add(Conv2D(256,kernel_size=(3,3)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Dropout(0.1))
model.add(Conv2D(640,kernel_size=(3,3)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Dropout(0.1))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(4))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 94, 257, 64)       640       
_________________________________________________________________
activation_6 (Activation)    (None, 94, 257, 64)       0         
_________________________________________________________________
batch_normalization_5 (Batch (None, 94, 257, 64)       256       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 47, 128, 64)       0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 47, 128, 64)       0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 45, 126, 128)      73856     
_________________________________________________________________
activation_7 (Activation)    (None, 45, 126, 128)      0         
__________

In [7]:
#sgd = keras.optimizers.SGD(lr=0.0001, momentum=0.9)
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
history = model.fit(x_train_new,y_train_new,epochs=15,batch_size=32,validation_data=(x_test_new,y_test_new))

plt.plot(history.history['acc'], label='Train Accuracy')
plt.plot(history.history['val_acc'], label='Test Accuracy')
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Accuracy-Loss')
plt.legend()

Train on 2485 samples, validate on 277 samples
Epoch 1/15
Epoch 2/15

KeyboardInterrupt: 