First iteration of the Baseline notebook [issue:10]

cosmir · Sep 18, 2018 · 06a6dd4 · 06a6dd4
1 parent dc2a71b
commit 06a6dd4
Showing 1 changed file with 330 additions and 0 deletions.
diff --git a/scripts/Baseline.ipynb b/scripts/Baseline.ipynb
@@ -0,0 +1,330 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import json\n",
+    "import pandas as pd\n",
+    "import os\n",
+    "import helper_numpy\n",
+    "import featurefy\n",
+    "import sys\n",
+    "extra_path = os.path.join(os.path.split(os.getcwd())[0], 'openmic')\n",
+    "if extra_path not in sys.path:\n",
+    "    sys.path.append(extra_path)\n",
+    "from util import filebase\n",
+    "from __future__ import print_function\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.pylab as pylab\n",
+    "from sklearn.externals import joblib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's create a folder to store de baseline models\n",
+    "!mkdir baseline-models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Here are the path in the TGZ that are used to create the dataset input/output\n",
+    "tgz_path = '/path/to/tgz-openmic-2018/'\n",
+    "csvfile = tgz_path + 'sparse-labels.csv'\n",
+    "vggishpath = tgz_path + 'vggish/'\n",
+    "outfile = tgz_path + 'openmic-2018.npz'\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Extracting the vggish features...\n",
+      "Extracting the labels information...\n",
+      "Saving the NPZ file...\n",
+      "Done.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[True]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Helper function to create the dataset\n",
+    "helper_numpy.main(csvfile, vggishpath, outfile)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's load the dataset\n",
+    "vggish_data = np.load(outfile)\n",
+    "X = vggish_data['X']\n",
+    "Y_true = vggish_data['Y_true']\n",
+    "Y_mask = vggish_data['Y_mask']\n",
+    "sample_key = vggish_data['sample_key']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# Let's split the data into the training and test set\n",
+    "split_test = pd.read_csv('split01_test.csv', header=None)\n",
+    "split_train = pd.read_csv('split01_train.csv', header=None)\n",
+    "\n",
+    "in_train = set()\n",
+    "for sk in split_train[0]:\n",
+    "    if sk not in in_train:\n",
+    "        in_train.add(sk)\n",
+    "\n",
+    "train_map = list()\n",
+    "test_map = list()\n",
+    "count = 0\n",
+    "for sk in sample_key:\n",
+    "    if sk in in_train:\n",
+    "        train_map.append(count)\n",
+    "    else:\n",
+    "        test_map.append(count)\n",
+    "    count += 1\n",
+    "\n",
+    "X_test = X[test_map,:,:]\n",
+    "X_train = X[train_map,:,:]\n",
+    "Y_mask_test = Y_mask[test_map,:]\n",
+    "Y_mask_train = Y_mask[train_map,:]\n",
+    "Y_true_test = Y_true[test_map,:]\n",
+    "Y_true_train = Y_true[train_map,:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "score on accordion is 0.917155903457 on the training set and 0.815985130112 on the test set.\n",
+      "score on banjo is 0.930459770115 on the training set and 0.728033472803 on the test set.\n",
+      "score on bass is 0.931929824561 on the training set and 0.749460043197 on the test set.\n",
+      "score on cello is 0.937158469945 on the training set and 0.680412371134 on the test set.\n",
+      "score on clarinet is 0.908309455587 on the training set and 0.7625 on the test set.\n",
+      "score on cymbals is 0.930715935335 on the training set and 0.889908256881 on the test set.\n",
+      "score on drums is 0.957671957672 on the training set and 0.858490566038 on the test set.\n",
+      "score on flute is 0.92049934297 on the training set and 0.690391459075 on the test set.\n",
+      "score on guitar is 0.980230642504 on the training set and 0.850917431193 on the test set.\n",
+      "score on mallet_percussion is 0.941842900302 on the training set and 0.690376569038 on the test set.\n",
+      "score on mandolin is 0.892759934676 on the training set and 0.693779904306 on the test set.\n",
+      "score on organ is 0.934886908842 on the training set and 0.747099767981 on the test set.\n",
+      "score on piano is 0.990038314176 on the training set and 0.893975903614 on the test set.\n",
+      "score on saxophone is 0.939516129032 on the training set and 0.755166931638 on the test set.\n",
+      "score on synthesizer is 0.978723404255 on the training set and 0.860526315789 on the test set.\n",
+      "score on trombone is 0.924509803922 on the training set and 0.734722222222 on the test set.\n",
+      "score on trumpet is 0.92491787893 on the training set and 0.695541401274 on the test set.\n",
+      "score on ukulele is 0.916076294278 on the training set and 0.718644067797 on the test set.\n",
+      "score on violin is 0.940798858773 on the training set and 0.770206022187 on the test set.\n",
+      "score on voice is 0.957142857143 on the training set and 0.842245989305 on the test set.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Let's focus on one class for illustration purposes. Eg: Drums\n",
+    "\n",
+    "with open(tgz_path + 'class-map.json', 'r') as f:\n",
+    "    class_map = json.load(f)\n",
+    "\n",
+    "for instrument in class_map.keys():\n",
+    "    inst_num = class_map[instrument]\n",
+    "    X_train_inst = X_train[Y_mask_train[:, inst_num], :, :]\n",
+    "    Y_true_train_inst = Y_true_train[Y_mask_train[:, inst_num], inst_num]\n",
+    "\n",
+    "    # Let's arange the data for a sklearn Random Forest model \n",
+    "    X_train_inst_sklearn = np.concatenate((np.std(X_train_inst, axis=1), np.std(X_train_inst, axis=1)), axis=1)\n",
+    "    X_train_inst_sklearn = np.nan_to_num(X_train_inst_sklearn)\n",
+    "    clf = RandomForestClassifier(max_depth=8, random_state=0)\n",
+    "    clf.fit(X_train_inst_sklearn, Y_true_train_inst.round())\n",
+    "    filename = os.getcwd() + '/baseline-models/clf_joblib_' + instrument + '.sav'\n",
+    "    joblib.dump(clf, filename)\n",
+    "\n",
+    "    # Let's evaluate the model on the test set\n",
+    "    X_test_inst = X_test[Y_mask_test[:, inst_num], :, :]\n",
+    "    Y_true_test_inst = Y_true_test[Y_mask_test[:, inst_num], inst_num]\n",
+    "    \n",
+    "    clf = joblib.load(filename)\n",
+    "    X_test_inst_sklearn = np.concatenate((np.std(X_test_inst, axis=1), np.std(X_test_inst, axis=1)), axis=1)\n",
+    "    X_test_inst_sklearn = np.nan_to_num(X_test_inst_sklearn)\n",
+    "    print('score on', instrument, 'is', np.mean(clf.predict(X_train_inst_sklearn) == Y_true_train_inst.round()), 'on the training set and', np.mean(clf.predict(X_test_inst_sklearn) == Y_true_test_inst.round()), 'on the test set.')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's now use the model end-to-end on new audio\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  0%|          | 0/1 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Restoring parameters from /Users/durand/miniconda2/envs/py36/lib/python3.6/site-packages/openmic/vggish/__model__/vggish_model.ckpt\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1/1 [00:02<00:00,  2.61s/it]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[True]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# first go from audio to VGGish\n",
+    "outpath = os.path.split(os.getcwd())[0] + '/tests/data/'\n",
+    "file_in = [os.path.split(os.getcwd())[0] + '/tests/data/audio/000046_3840.ogg']\n",
+    "featurefy.main(file_in, outpath)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# second go from VGGish to numpy format\n",
+    "file_out = os.path.join(outpath,\n",
+    "                        os.path.extsep.join([filebase(str(file_in)), 'npz']))\n",
+    "vggish_new = np.load(file_out)\n",
+    "time_len, _ = np.shape(vggish_new['features_z'])\n",
+    "input_num = int(time_len / 10)\n",
+    "X_new = np.empty([input_num, 10, 128], dtype=int)\n",
+    "for ii in range(input_num):\n",
+    "    X_new[ii, :, :] = vggish_new['features_z'][ii * 10:(ii+1) * 10, :]\n",
+    "X_new_sklearn = np.concatenate((np.std(X_new, axis=1), np.std(X_new, axis=1)), axis=1)\n",
+    "X_new_sklearn = np.nan_to_num(X_new_sklearn)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Probability of accordion is: 0.230364370527\n",
+      "Probability of banjo is: 0.143900834708\n",
+      "Probability of bass is: 0.103372949379\n",
+      "Probability of cello is: 0.29027357582\n",
+      "Probability of clarinet is: 0.0591828023427\n",
+      "Probability of cymbals is: 0.605636873256\n",
+      "Probability of drums is: 0.918562169246\n",
+      "Probability of flute is: 0.256657592737\n",
+      "Probability of guitar is: 0.449598069877\n",
+      "Probability of mallet_percussion is: 0.153748873522\n",
+      "Probability of mandolin is: 0.356934285826\n",
+      "Probability of organ is: 0.0778685334558\n",
+      "Probability of piano is: 0.478437470879\n",
+      "Probability of saxophone is: 0.501320175057\n",
+      "Probability of synthesizer is: 0.482374188669\n",
+      "Probability of trombone is: 0.193985798423\n",
+      "Probability of trumpet is: 0.474401512417\n",
+      "Probability of ukulele is: 0.253232329041\n",
+      "Probability of violin is: 0.568555897323\n",
+      "Probability of voice is: 0.965989973694\n"
+     ]
+    }
+   ],
+   "source": [
+    "# finally, apply the classifier\n",
+    "for instrument in class_map.keys():\n",
+    "    filename = os.getcwd() + '/baseline-models/clf_joblib_' + instrument + '.sav'\n",
+    "    clf = joblib.load(filename)\n",
+    "    print('Probability of', instrument, 'is:', np.median(clf.predict_proba(X_new_sklearn)[:,1]))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}