In [1]:
import numpy as np
import json
import pandas as pd
import os
import helper_numpy
import featurefy
import sys
extra_path = os.path.join(os.path.split(os.getcwd())[0], 'openmic')
if extra_path not in sys.path:
    sys.path.append(extra_path)
from util import filebase
from __future__ import print_function
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
from sklearn.externals import joblib

In [2]:
# Let's create a folder to store de baseline models
!mkdir baseline-models

In [3]:
# Here are the path in the TGZ that are used to create the dataset input/output
tgz_path = '/path/to/tgz-openmic-2018/'
csvfile = tgz_path + 'sparse-labels.csv'
vggishpath = tgz_path + 'vggish/'
outfile = tgz_path + 'openmic-2018.npz'


In [4]:
# Helper function to create the dataset
helper_numpy.main(csvfile, vggishpath, outfile)

Extracting the vggish features...
Extracting the labels information...
Saving the NPZ file...
Done.


[True]

In [5]:
# Let's load the dataset
vggish_data = np.load(outfile)
X = vggish_data['X']
Y_true = vggish_data['Y_true']
Y_mask = vggish_data['Y_mask']
sample_key = vggish_data['sample_key']

In [6]:
# Let's split the data into the training and test set
split_test = pd.read_csv('split01_test.csv', header=None)
split_train = pd.read_csv('split01_train.csv', header=None)

in_train = set()
for sk in split_train[0]:
    if sk not in in_train:
        in_train.add(sk)

train_map = list()
test_map = list()
count = 0
for sk in sample_key:
    if sk in in_train:
        train_map.append(count)
    else:
        test_map.append(count)
    count += 1

X_test = X[test_map,:,:]
X_train = X[train_map,:,:]
Y_mask_test = Y_mask[test_map,:]
Y_mask_train = Y_mask[train_map,:]
Y_true_test = Y_true[test_map,:]
Y_true_train = Y_true[train_map,:]

In [7]:
# Let's focus on one class for illustration purposes. Eg: Drums

with open(tgz_path + 'class-map.json', 'r') as f:
    class_map = json.load(f)

for instrument in class_map.keys():
    inst_num = class_map[instrument]
    X_train_inst = X_train[Y_mask_train[:, inst_num], :, :]
    Y_true_train_inst = Y_true_train[Y_mask_train[:, inst_num], inst_num]

    # Let's arange the data for a sklearn Random Forest model 
    X_train_inst_sklearn = np.concatenate((np.std(X_train_inst, axis=1), np.std(X_train_inst, axis=1)), axis=1)
    X_train_inst_sklearn = np.nan_to_num(X_train_inst_sklearn)
    clf = RandomForestClassifier(max_depth=8, random_state=0)
    clf.fit(X_train_inst_sklearn, Y_true_train_inst.round())
    filename = os.getcwd() + '/baseline-models/clf_joblib_' + instrument + '.sav'
    joblib.dump(clf, filename)

    # Let's evaluate the model on the test set
    X_test_inst = X_test[Y_mask_test[:, inst_num], :, :]
    Y_true_test_inst = Y_true_test[Y_mask_test[:, inst_num], inst_num]
    
    clf = joblib.load(filename)
    X_test_inst_sklearn = np.concatenate((np.std(X_test_inst, axis=1), np.std(X_test_inst, axis=1)), axis=1)
    X_test_inst_sklearn = np.nan_to_num(X_test_inst_sklearn)
    print('score on', instrument, 'is', np.mean(clf.predict(X_train_inst_sklearn) == Y_true_train_inst.round()), 'on the training set and', np.mean(clf.predict(X_test_inst_sklearn) == Y_true_test_inst.round()), 'on the test set.')


score on accordion is 0.917155903457 on the training set and 0.815985130112 on the test set.
score on banjo is 0.930459770115 on the training set and 0.728033472803 on the test set.
score on bass is 0.931929824561 on the training set and 0.749460043197 on the test set.
score on cello is 0.937158469945 on the training set and 0.680412371134 on the test set.
score on clarinet is 0.908309455587 on the training set and 0.7625 on the test set.
score on cymbals is 0.930715935335 on the training set and 0.889908256881 on the test set.
score on drums is 0.957671957672 on the training set and 0.858490566038 on the test set.
score on flute is 0.92049934297 on the training set and 0.690391459075 on the test set.
score on guitar is 0.980230642504 on the training set and 0.850917431193 on the test set.
score on mallet_percussion is 0.941842900302 on the training set and 0.690376569038 on the test set.
score on mandolin is 0.892759934676 on the training set and 0.693779904306 on the test set.
score 

In [8]:
# Let's now use the model end-to-end on new audio


In [9]:
# first go from audio to VGGish
outpath = os.path.split(os.getcwd())[0] + '/tests/data/'
file_in = [os.path.split(os.getcwd())[0] + '/tests/data/audio/000046_3840.ogg']
featurefy.main(file_in, outpath)

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:tensorflow:Restoring parameters from /Users/durand/miniconda2/envs/py36/lib/python3.6/site-packages/openmic/vggish/__model__/vggish_model.ckpt


100%|██████████| 1/1 [00:02<00:00,  2.61s/it]


[True]

In [10]:
# second go from VGGish to numpy format
file_out = os.path.join(outpath,
                        os.path.extsep.join([filebase(str(file_in)), 'npz']))
vggish_new = np.load(file_out)
time_len, _ = np.shape(vggish_new['features_z'])
input_num = int(time_len / 10)
X_new = np.empty([input_num, 10, 128], dtype=int)
for ii in range(input_num):
    X_new[ii, :, :] = vggish_new['features_z'][ii * 10:(ii+1) * 10, :]
X_new_sklearn = np.concatenate((np.std(X_new, axis=1), np.std(X_new, axis=1)), axis=1)
X_new_sklearn = np.nan_to_num(X_new_sklearn)


In [11]:
# finally, apply the classifier
for instrument in class_map.keys():
    filename = os.getcwd() + '/baseline-models/clf_joblib_' + instrument + '.sav'
    clf = joblib.load(filename)
    print('Probability of', instrument, 'is:', np.median(clf.predict_proba(X_new_sklearn)[:,1]))

Probability of accordion is: 0.230364370527
Probability of banjo is: 0.143900834708
Probability of bass is: 0.103372949379
Probability of cello is: 0.29027357582
Probability of clarinet is: 0.0591828023427
Probability of cymbals is: 0.605636873256
Probability of drums is: 0.918562169246
Probability of flute is: 0.256657592737
Probability of guitar is: 0.449598069877
Probability of mallet_percussion is: 0.153748873522
Probability of mandolin is: 0.356934285826
Probability of organ is: 0.0778685334558
Probability of piano is: 0.478437470879
Probability of saxophone is: 0.501320175057
Probability of synthesizer is: 0.482374188669
Probability of trombone is: 0.193985798423
Probability of trumpet is: 0.474401512417
Probability of ukulele is: 0.253232329041
Probability of violin is: 0.568555897323
Probability of voice is: 0.965989973694
