# Instrument Classification - Part 1 : Pre-Processing

We start our first trial by trying to recognise piano (pia), acoustic guitar (gac)

In [10]:
import numpy as np
import librosa
import os
import math
from random import shuffle
from collections import deque
from pydub import AudioSegment
from pydub.utils import make_chunks

In [70]:
dirpia = './Instrument-Classification/IRMAS-TrainingData/pia/'
dirgac = './Instrument-Classification/IRMAS-TrainingData/gac/'
dirsax = './Instrument-Classification/IRMAS-TrainingData/sax/'
dirvoi = './Instrument-Classification/IRMAS-TrainingData/voi/'

instr = ['pia','gac','sax','voi']
dir = [dirpia,dirgac,dirsax,dirvoi]

In [159]:
def split_train_test(dir_array):
    train_list = []
    test_list  = []
    list = []
    for dir in dir_array:
        for f in os.listdir(dir):
            list.append(dir+f)
    shuffle(list)
    # 90-10 split
    split_index = math.floor(len(list)*0.90)
    train_list = train_list + list[:split_index]
    test_list = test_list + list[split_index:]
    return (train_list,test_list)

def pre_processing(trainList,testList):
    x_train = []
    y_train = []
    x_test  = []
    y_test  = []
    for y in [trainList,testList]:
        for f in y:
            label = f.split('/')
            # downsampled to 22050Hz and downmixed to mono
            wav,sr = librosa.load(f,mono=True,sr=22050)
            # normalized by the root mean square energy
            wav = wav / np.sqrt(np.mean(wav**2))
            # chunked to 1sec long snippets
            chunks = make_chunks(wav,sr)
            for wav in chunks:
                # transformed into mel-spectrograms with given traits
                mel = librosa.feature.melspectrogram(wav,n_mels=96,fmax=11025,n_fft=1024,hop_length=256,power=1)
                # decibel scaling
                mel = librosa.core.amplitude_to_db(mel)
                # saved to train and test arrays
                if (y == trainList):
                    y_train.append(label[-2])
                    x_train.append(mel)
                else:
                    y_test.append(label[-2])
                    x_test.append(mel) 
    return (x_train,y_train,x_test,y_test)

def to_categorical(list):
    temp_list = [[0]*len(instr)]*(len(list))
    for i in range(len(list)):
        x = [0]*len(instr)
        x.insert(0,1)
        x.pop()
        for ins in instr:
            if (ins in list[i]): 
                temp_list[i] = [sum(x) for x in zip(temp_list[i],x)]
            x.insert(0,x.pop())
    return temp_list

In [43]:
(trainList,testList) = split_train_test(dir)
(x_train,y_train,x_test,y_test) = pre_processing(trainList,testList)

y_train = np.array(to_categorical(y_train))
y_test = np.array(to_categorical(y_test))
x_train = np.expand_dims(np.array(x_train),-1)
x_test = np.expand_dims(np.array(x_test),-1)

#shuffling sets
id1 = np.random.permutation(len(x_train))
id2 = np.random.permutation(len(x_test))
x_train,y_train = x_train[id1], y_train[id1]
x_test,y_test = x_test[id2], y_test[id2]

print("Train Size:", x_train.shape)
print("Test  Size:", x_test.shape)

Train Size: (3666, 96, 87, 1)
Test  Size: (408, 96, 87, 1)


In [44]:
np.save('x_train2.npy',x_train)
np.save('x_test2.npy' ,x_test)
np.save('y_train2.npy',y_train)
np.save('y_test2.npy' ,y_test)

In [160]:
list = [['sax'],['gac','sax','voi'],['pia','gac'],['pia'],['voi','gac']]
print(to_categorical(list))

[[0, 0, 1, 0], [0, 1, 1, 1], [1, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 1]]
