Skip to content

Commit

Permalink
First iteration of the Baseline notebook [issue:10]
Browse files Browse the repository at this point in the history
  • Loading branch information
simondurand123 committed Sep 18, 2018
1 parent dc2a71b commit 06a6dd4
Showing 1 changed file with 330 additions and 0 deletions.
330 changes: 330 additions & 0 deletions scripts/Baseline.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,330 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import json\n",
"import pandas as pd\n",
"import os\n",
"import helper_numpy\n",
"import featurefy\n",
"import sys\n",
"extra_path = os.path.join(os.path.split(os.getcwd())[0], 'openmic')\n",
"if extra_path not in sys.path:\n",
" sys.path.append(extra_path)\n",
"from util import filebase\n",
"from __future__ import print_function\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.pylab as pylab\n",
"from sklearn.externals import joblib"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Let's create a folder to store de baseline models\n",
"!mkdir baseline-models"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Here are the path in the TGZ that are used to create the dataset input/output\n",
"tgz_path = '/path/to/tgz-openmic-2018/'\n",
"csvfile = tgz_path + 'sparse-labels.csv'\n",
"vggishpath = tgz_path + 'vggish/'\n",
"outfile = tgz_path + 'openmic-2018.npz'\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Extracting the vggish features...\n",
"Extracting the labels information...\n",
"Saving the NPZ file...\n",
"Done.\n"
]
},
{
"data": {
"text/plain": [
"[True]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Helper function to create the dataset\n",
"helper_numpy.main(csvfile, vggishpath, outfile)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Let's load the dataset\n",
"vggish_data = np.load(outfile)\n",
"X = vggish_data['X']\n",
"Y_true = vggish_data['Y_true']\n",
"Y_mask = vggish_data['Y_mask']\n",
"sample_key = vggish_data['sample_key']"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Let's split the data into the training and test set\n",
"split_test = pd.read_csv('split01_test.csv', header=None)\n",
"split_train = pd.read_csv('split01_train.csv', header=None)\n",
"\n",
"in_train = set()\n",
"for sk in split_train[0]:\n",
" if sk not in in_train:\n",
" in_train.add(sk)\n",
"\n",
"train_map = list()\n",
"test_map = list()\n",
"count = 0\n",
"for sk in sample_key:\n",
" if sk in in_train:\n",
" train_map.append(count)\n",
" else:\n",
" test_map.append(count)\n",
" count += 1\n",
"\n",
"X_test = X[test_map,:,:]\n",
"X_train = X[train_map,:,:]\n",
"Y_mask_test = Y_mask[test_map,:]\n",
"Y_mask_train = Y_mask[train_map,:]\n",
"Y_true_test = Y_true[test_map,:]\n",
"Y_true_train = Y_true[train_map,:]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"score on accordion is 0.917155903457 on the training set and 0.815985130112 on the test set.\n",
"score on banjo is 0.930459770115 on the training set and 0.728033472803 on the test set.\n",
"score on bass is 0.931929824561 on the training set and 0.749460043197 on the test set.\n",
"score on cello is 0.937158469945 on the training set and 0.680412371134 on the test set.\n",
"score on clarinet is 0.908309455587 on the training set and 0.7625 on the test set.\n",
"score on cymbals is 0.930715935335 on the training set and 0.889908256881 on the test set.\n",
"score on drums is 0.957671957672 on the training set and 0.858490566038 on the test set.\n",
"score on flute is 0.92049934297 on the training set and 0.690391459075 on the test set.\n",
"score on guitar is 0.980230642504 on the training set and 0.850917431193 on the test set.\n",
"score on mallet_percussion is 0.941842900302 on the training set and 0.690376569038 on the test set.\n",
"score on mandolin is 0.892759934676 on the training set and 0.693779904306 on the test set.\n",
"score on organ is 0.934886908842 on the training set and 0.747099767981 on the test set.\n",
"score on piano is 0.990038314176 on the training set and 0.893975903614 on the test set.\n",
"score on saxophone is 0.939516129032 on the training set and 0.755166931638 on the test set.\n",
"score on synthesizer is 0.978723404255 on the training set and 0.860526315789 on the test set.\n",
"score on trombone is 0.924509803922 on the training set and 0.734722222222 on the test set.\n",
"score on trumpet is 0.92491787893 on the training set and 0.695541401274 on the test set.\n",
"score on ukulele is 0.916076294278 on the training set and 0.718644067797 on the test set.\n",
"score on violin is 0.940798858773 on the training set and 0.770206022187 on the test set.\n",
"score on voice is 0.957142857143 on the training set and 0.842245989305 on the test set.\n"
]
}
],
"source": [
"# Let's focus on one class for illustration purposes. Eg: Drums\n",
"\n",
"with open(tgz_path + 'class-map.json', 'r') as f:\n",
" class_map = json.load(f)\n",
"\n",
"for instrument in class_map.keys():\n",
" inst_num = class_map[instrument]\n",
" X_train_inst = X_train[Y_mask_train[:, inst_num], :, :]\n",
" Y_true_train_inst = Y_true_train[Y_mask_train[:, inst_num], inst_num]\n",
"\n",
" # Let's arange the data for a sklearn Random Forest model \n",
" X_train_inst_sklearn = np.concatenate((np.std(X_train_inst, axis=1), np.std(X_train_inst, axis=1)), axis=1)\n",
" X_train_inst_sklearn = np.nan_to_num(X_train_inst_sklearn)\n",
" clf = RandomForestClassifier(max_depth=8, random_state=0)\n",
" clf.fit(X_train_inst_sklearn, Y_true_train_inst.round())\n",
" filename = os.getcwd() + '/baseline-models/clf_joblib_' + instrument + '.sav'\n",
" joblib.dump(clf, filename)\n",
"\n",
" # Let's evaluate the model on the test set\n",
" X_test_inst = X_test[Y_mask_test[:, inst_num], :, :]\n",
" Y_true_test_inst = Y_true_test[Y_mask_test[:, inst_num], inst_num]\n",
" \n",
" clf = joblib.load(filename)\n",
" X_test_inst_sklearn = np.concatenate((np.std(X_test_inst, axis=1), np.std(X_test_inst, axis=1)), axis=1)\n",
" X_test_inst_sklearn = np.nan_to_num(X_test_inst_sklearn)\n",
" print('score on', instrument, 'is', np.mean(clf.predict(X_train_inst_sklearn) == Y_true_train_inst.round()), 'on the training set and', np.mean(clf.predict(X_test_inst_sklearn) == Y_true_test_inst.round()), 'on the test set.')\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Let's now use the model end-to-end on new audio\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
" 0%| | 0/1 [00:00<?, ?it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Restoring parameters from /Users/durand/miniconda2/envs/py36/lib/python3.6/site-packages/openmic/vggish/__model__/vggish_model.ckpt\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:02<00:00, 2.61s/it]\n"
]
},
{
"data": {
"text/plain": [
"[True]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# first go from audio to VGGish\n",
"outpath = os.path.split(os.getcwd())[0] + '/tests/data/'\n",
"file_in = [os.path.split(os.getcwd())[0] + '/tests/data/audio/000046_3840.ogg']\n",
"featurefy.main(file_in, outpath)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# second go from VGGish to numpy format\n",
"file_out = os.path.join(outpath,\n",
" os.path.extsep.join([filebase(str(file_in)), 'npz']))\n",
"vggish_new = np.load(file_out)\n",
"time_len, _ = np.shape(vggish_new['features_z'])\n",
"input_num = int(time_len / 10)\n",
"X_new = np.empty([input_num, 10, 128], dtype=int)\n",
"for ii in range(input_num):\n",
" X_new[ii, :, :] = vggish_new['features_z'][ii * 10:(ii+1) * 10, :]\n",
"X_new_sklearn = np.concatenate((np.std(X_new, axis=1), np.std(X_new, axis=1)), axis=1)\n",
"X_new_sklearn = np.nan_to_num(X_new_sklearn)\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Probability of accordion is: 0.230364370527\n",
"Probability of banjo is: 0.143900834708\n",
"Probability of bass is: 0.103372949379\n",
"Probability of cello is: 0.29027357582\n",
"Probability of clarinet is: 0.0591828023427\n",
"Probability of cymbals is: 0.605636873256\n",
"Probability of drums is: 0.918562169246\n",
"Probability of flute is: 0.256657592737\n",
"Probability of guitar is: 0.449598069877\n",
"Probability of mallet_percussion is: 0.153748873522\n",
"Probability of mandolin is: 0.356934285826\n",
"Probability of organ is: 0.0778685334558\n",
"Probability of piano is: 0.478437470879\n",
"Probability of saxophone is: 0.501320175057\n",
"Probability of synthesizer is: 0.482374188669\n",
"Probability of trombone is: 0.193985798423\n",
"Probability of trumpet is: 0.474401512417\n",
"Probability of ukulele is: 0.253232329041\n",
"Probability of violin is: 0.568555897323\n",
"Probability of voice is: 0.965989973694\n"
]
}
],
"source": [
"# finally, apply the classifier\n",
"for instrument in class_map.keys():\n",
" filename = os.getcwd() + '/baseline-models/clf_joblib_' + instrument + '.sav'\n",
" clf = joblib.load(filename)\n",
" print('Probability of', instrument, 'is:', np.median(clf.predict_proba(X_new_sklearn)[:,1]))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 06a6dd4

Please sign in to comment.