In [5]:
import numpy as np
from scipy.io import wavfile
import librosa
import os
import sys
import pickle 
import numpy as np
from sklearn.model_selection import train_test_split
from collections import OrderedDict
import gc

training_data = 'IRMAS-TrainingData/'
testing_data = ['IRMAS-TestingData-Part1/Part1/', 'IRMAS-TestingData-Part2/IRTestingData-Part2/', 'IRMAS-TestingData-Part3/Part3/']
instrument_map = {
        "cel" : 0, 
        "cla" : 1, 
        "flu" : 2, 
        "gac" : 3, 
        "gel" : 4, 
        "org" : 5, 
        "pia" : 6, 
        "sax" : 7, 
        "tru" : 8, 
        "vio" : 9, 
        "voi" : 10
        }

In [6]:
def preprocess_audio(filename):
    """
    Performs audio processing and generates the mel-spectrograms for the wav file.
    
    Returns: mel spectrogram for the wav file.
    """ 
    fs, data = wavfile.read(filename)
    # Convert to a mono signal by taking the mean of the left and right channels
    audio = np.mean(data, axis=1)
    # Downsampling from 44KHz to 22KHz
    audio = audio[np.arange(0, audio.size, 2)]
    # Normalize the signal
    audio /= np.max(np.abs(audio))  

    # Computing Short Time Fourier Transform
    stft = np.abs(librosa.stft(audio, win_length=1024, hop_length=512,
        center=True))
    # Converting to Mel Spectogram
    mel_spec = librosa.feature.melspectrogram(S=stft, sr=22050, n_mels=128)

    # Segment the spectogram
    seg_dur = 43
    spec_list = []
    for idx in range(0, mel_spec.shape[1] - seg_dur + 1, seg_dur):
        spec_list.append(mel_spec[:, idx : (idx + seg_dur)])
    mspecs = np.expand_dims(np.array(spec_list), axis=1)
    
    return mspecs

def extract_features_for_audio(filename, testing, index):
    """ 
    For a given wav file, the features are of the following format:
    1. filename: the name of the audio file. 
    2. melspec: the mel-spectrogram of the wav file.
    3. labels: the instruments present in the wav file. These are extracted from the filename. 
    
    Arguments:
        filename: the name of the wav file for processing
        testing: whether the processing is done for testing dataset or not. Each file in testing dataset is annotated with multiple instruments
                whereas each file in the training dataset is annotated with single instrument.
    """
    features = {}
    features["filename"] = filename[:-4]
    features["melspec"] = preprocess_audio(filename)
    features["labels"] = np.zeros([11])
    
    if testing == False:
        features["labels"][index] = 1
    else:
        with open(filename[:-4] + '.txt', 'r') as fp:
            lines = fp.readlines()
            for instrument in lines:
                features["labels"][instrument_map[instrument[:3]]] = 1

    return features

def preprocess_training_data():
    """
    Preprocess the training data.
    
    Returns: the features for the training dataset.
    """ 
    features = []
    for instrument in instrument_map.keys() :

        for root, dirs, files in os.walk(training_data + instrument):
            total_files = len(files)
            print("Processing directory: ", root, "Total files: ", total_files)
            count = 0
            for file in files:
                if file.endswith('.wav'):
                    count += 1
                    feat = extract_features_for_audio(training_data+instrument+ "/" +
                            file,testing=False, index=instrument_map[instrument] )
                    features.append(feat)
                    
    return features

def preprocess_test_data():
    """
    Preprocess the testing data.
    
    Returns: the features for the testing dataset.
    """ 
    testing_data_features = []
    for folder in testing_data:
        print("Processing folder ", folder)

        features = []
        for root, dirs, files in os.walk(folder):
            total_files = len(files)/2
            print("Processing directory: ", root, ". Total Files: ", total_files)
            count = 0
            for file in files:
                if file.endswith('.wav'):
                    feature = extract_features_for_audio(folder+file, index=0, testing=True)
                    features.append(feature)

        testing_data_features.append(features)
    return np.concatenate(testing_data_features)

In [7]:
training_data = preprocess_training_data()

# Initialize the training feature and label matrices
X_train = np.zeros((20115, 128, 43))
y_train = np.zeros((20115, 11))

 # Fill the training data matrices 
j = 0
for idx, _ in enumerate(training_data):
    assert len(training_data[idx]['melspec']) == 3
    label = training_data[idx]["labels"]
    for feat in training_data[idx]['melspec']:
        X_train[j,:,:] = feat
        y_train[j,:] = label
        j+=1
        
X_train = np.asarray(np.split(X_train, np.arange(3, X_train.shape[0]-1, 3)))
y_train = np.asarray(np.split(y_train, np.arange(3, y_train.shape[0]-1, 3)))

for y in y_train:
    assert y.shape[0] == 3

#Shuffle the dataset for randomization
permutations = np.random.permutation(X_train.shape[0])
X_train = np.vstack(X_train[permutations])
y_train = np.vstack(y_train[permutations])

# Split the training dataset
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train,
        test_size=0.15, shuffle=False)
# Save Numpy arrays and dictionaries
np.save('X_train', X_train)
np.save('y_train', y_train)
np.save('X_val', X_val)
np.save('y_val', y_val)

# Deleting these might help in garbage collection and freeing up memory.
del X_train
del y_train
del X_val
del y_val
gc.collect()

Processing directory:  IRMAS-TrainingData/cel Total files:  388
Processing directory:  IRMAS-TrainingData/cla Total files:  505
Processing directory:  IRMAS-TrainingData/flu Total files:  451
Processing directory:  IRMAS-TrainingData/gac Total files:  637
Processing directory:  IRMAS-TrainingData/gel Total files:  760
Processing directory:  IRMAS-TrainingData/org Total files:  682
Processing directory:  IRMAS-TrainingData/pia Total files:  721
Processing directory:  IRMAS-TrainingData/sax Total files:  626
Processing directory:  IRMAS-TrainingData/tru Total files:  577
Processing directory:  IRMAS-TrainingData/vio Total files:  580
Processing directory:  IRMAS-TrainingData/voi Total files:  778


0

In [8]:
testing_data = preprocess_test_data()

# Initialize the testing feature and label dictionaries
X_test = OrderedDict()
y_test = OrderedDict()

# Store the number of audio fragments per testing file
# This will be used for aggrating classification on the test set
num_fragments_per_file = [len(testing_data[i]['melspec']) 
        for i, _ in enumerate(testing_data)]

# Fill the test data dictionaries
for idx, _ in enumerate(testing_data):
    # Initialize the feature and lable matrices for test file at index ix
    X_test_file_ix = np.zeros((num_fragments_per_file[idx], 128, 43))
    y_test_file_ix = np.zeros((num_fragments_per_file[idx], 11))

    label = testing_data[idx]["labels"]

    j = 0
    for feat in testing_data[idx]['melspec']:
        X_test_file_ix[j,:,:] = feat
        y_test_file_ix[j,:] = label
        j+=1

    X_test[idx] = X_test_file_ix
    y_test[idx] = y_test_file_ix

# Write the testing data and labels into pickle files for further training
f = open("X_test.pkl", "wb")
pickle.dump(X_test, f)
f.close()

f = open("y_test.pkl", "wb")
pickle.dump(y_test, f)
f.close()  
print("Data preprocessing successfully completed.")

Processing folder  IRMAS-TestingData-Part1/Part1/
Processing directory:  IRMAS-TestingData-Part1/Part1/ . Total Files:  807.0
Processing folder  IRMAS-TestingData-Part2/IRTestingData-Part2/
Processing directory:  IRMAS-TestingData-Part2/IRTestingData-Part2/ . Total Files:  1301.5
Processing folder  IRMAS-TestingData-Part3/Part3/
Processing directory:  IRMAS-TestingData-Part3/Part3/ . Total Files:  766.0
Data preprocessing successfully completed.


: 