Skip to content
Permalink
Browse files

Sound Classifier: Allow Reusing Deep Features (#1657)

* Sound Classifier: Allow Reusing Deep Features

- Add tc.sound_classifier.get_deep_features.
- Change sound classifier (create & predict) to use get_deep_features.
- Allow sound classifier (create & predict) to use deep features directly.

- Use SArrayBuilder for deep features
- Logging: print header info only if printing progress info.
- Any function which could result in deep feature extraction should have `verbose` flag.
- Allow predict to do parameter checking. Rather than doing not as well in a bunch of other
      places that just call predict
- classify/evaluate/predict_topk should take SFrame or dict in addition to SArray.

- Unit tests: test deep feature reuse.
- Unit tests: copy data rather than regenerate each time.
- Unit tests: make problematic test less sensitive (six decimal places is fine).
- Unit tests: test validation set actually getting used or not.

- Took a first stab at the user guide section for this feature, more will be added later (#1649).
- Fixed finicky unit test
  • Loading branch information...
TobyRoseman committed Mar 25, 2019
1 parent 6942101 commit e9b7ac6cad5a28894815aa4c5c9282163b97775b
@@ -11,6 +11,7 @@

from .util import TempDirectory

from copy import copy
import math
from os import mkdir
import unittest
@@ -145,11 +146,14 @@ def generate_sine_wave(length, sample_rate):
return data


binary_test_data = _generate_binary_test_data()


class ClassifierTestTwoClassesStringLabels(unittest.TestCase):

@classmethod
def setUpClass(self):
self.data = _generate_binary_test_data()
self.data = copy(binary_test_data)
self.is_binary_classification = True
self.model = tc.sound_classifier.create(self.data, 'labels', feature='audio')

@@ -213,7 +217,7 @@ def test_save_and_load(self):
old_model_probs = self.model.predict(self.data['audio'], output_type='probability_vector')
new_model_probs = new_model.predict(self.data['audio'], output_type='probability_vector')
for a, b in zip(old_model_probs, new_model_probs):
self.assertItemsEqual(a, b)
np.testing.assert_array_almost_equal(a, b, decimal=6)

@unittest.skipIf(_mac_ver() < (10,14), 'Custom models only supported on macOS 10.14+')
def test_export_coreml_with_prediction(self):
@@ -281,14 +285,16 @@ def test_predict_topk(self):
self.assertTrue(len(unique_ranks) == 1)
self.assertTrue(unique_ranks[0] == 0)

def test_validation_set(self):
self.assertTrue(self.model.validation_accuracy is None)


class ClassifierTestTwoClassesIntLabels(ClassifierTestTwoClassesStringLabels):
@classmethod
def setUpClass(self):
self.data = _generate_binary_test_data()
self.data = copy(binary_test_data)
self.data['labels'] = self.data['labels'].apply(lambda x: 0 if x == 'white noise' else 1)
self.is_binary_classification = True

self.model = tc.sound_classifier.create(self.data, 'labels', feature='audio', validation_set=None)


@@ -303,11 +309,14 @@ def generate_constant_noise(length, sample_rate):
generate_constant_noise(1, 17000)]
constant_noise = tc.SFrame({'audio': constant_noise,
'labels': ['constant noise'] * len(constant_noise)})
self.data = _generate_binary_test_data().append(constant_noise)
self.data = copy(binary_test_data).append(constant_noise)

self.is_binary_classification = False
self.model = tc.sound_classifier.create(self.data, 'labels', feature='audio', validation_set=self.data)

def test_validation_set(self):
self.assertTrue(self.model.validation_accuracy is not None)


@unittest.skipIf(_mac_ver() < (10,14), 'Custom models only supported on macOS 10.14+')
class CoreMlCustomModelPreprocessingTest(unittest.TestCase):
@@ -347,3 +356,54 @@ def test_case(self):

self.assertEqual(y2.shape, (1,96,64))
self.assertTrue(np.isclose(y1, y2, atol=1e-04).all())


class ReuseDeepFeatures(unittest.TestCase):
def test_simple_case(self):
data = copy(binary_test_data)
deep_features = tc.sound_classifier.get_deep_features(data['audio'])

# Verify deep features in correct format
self.assertTrue(isinstance(deep_features, tc.SArray))
self.assertEqual(len(data), len(deep_features))
self.assertEqual(deep_features.dtype, list)
self.assertEqual(len(deep_features[0]), 3)
self.assertTrue(isinstance(deep_features[0][0], np.ndarray))
self.assertEqual(deep_features[0][0].dtype, np.float64)
self.assertEqual(len(deep_features[0][0]), 12288)

# Test helper methods
self.assertTrue(tc.sound_classifier._is_audio_data_sarray(data['audio']))
self.assertTrue(tc.sound_classifier._is_deep_feature_sarray(deep_features))

original_audio_data = data['audio']
del data['audio']

# Create a model using the deep features
data['features'] = deep_features
model = tc.sound_classifier.create(data, 'labels', feature='features')

# Test predict
predictions_from_audio = model.predict(original_audio_data, output_type='probability_vector')
predictions_from_deep_features = model.predict(deep_features, output_type='probability_vector')
for a, b in zip(predictions_from_audio, predictions_from_deep_features):
self.assertAlmostEqual(a, b)

# Test classify
predictions_from_audio = model.classify(original_audio_data)
predictions_from_deep_features = model.classify(deep_features)
for a, b in zip(predictions_from_audio, predictions_from_deep_features):
self.assertEqual(a, b)

# Test predict_topk
predictions_from_audio = model.predict_topk(original_audio_data, k=2)
predictions_from_deep_features = model.predict_topk(deep_features, k=2)
for a, b in zip(predictions_from_audio, predictions_from_deep_features):
self.assertEqual(a, b)

# Test evaluate
predictions_from_audio = model.evaluate(tc.SFrame({'features': original_audio_data,
'labels': data['labels']}))
predictions_from_deep_features = model.evaluate(tc.SFrame({'features': deep_features,
'labels': data['labels']}))
self.assertEqual(predictions_from_audio['f1_score'], predictions_from_deep_features['f1_score'])
@@ -57,7 +57,7 @@ def load_audio(path, with_path=True, recursive=True, ignore_failure=True, random
Examples
--------
>>> audio_path = "~/Documents/myAudioFiles/"
>>> audio_sframe = tc.audio_analysis.load_audio(audio_analysis, recursive=True)
>>> audio_sframe = tc.audio_analysis.load_audio(audio_path, recursive=True)
"""
all_wav_files = []

@@ -9,4 +9,4 @@
from __future__ import print_function as _
from __future__ import division as _

from .sound_classifier import *
from .sound_classifier import create, get_deep_features, SoundClassifier
@@ -3,6 +3,8 @@
from coremltools.models import MLModel
import mxnet as mx
from mxnet.gluon import nn, utils
import numpy as _np
import turicreate as _tc

from .._internal_utils import _mac_ver
from .. import _mxnet_utils
@@ -31,37 +33,43 @@ def __repr__(self):


class VGGishFeatureExtractor(object):
name = 'VGGish'
output_length = 12288

@staticmethod
def preprocess_data(audio_data, labels, verbose=True):
def _preprocess_data(audio_data, verbose=True):
'''
Preprocess each example, breaking it up into frames.
Returns two numpy arrays: preprocessed frame and their labels.
Returns two numpy arrays: preprocessed frame and their indexes
'''
from .vggish_input import waveform_to_examples
import numpy as np

last_progress_update = _time.time()
progress_header_printed = False

# Can't run as a ".apply(...)" due to numba.jit decorator issue:
# https://github.com/apple/turicreate/issues/1216
preprocessed_data, output_labels = [], []
preprocessed_data, audio_data_index = [], []
for i, audio_dict in enumerate(audio_data):
scaled_data = audio_dict['data'] / 32768.0
data = waveform_to_examples(scaled_data, audio_dict['sample_rate'])

for j in data:
preprocessed_data.append([j])
output_labels.append(labels[i])
audio_data_index.append(i)

# If `verbose` is set, print an progress update about every 20s
if verbose and _time.time() - last_progress_update >= 20:
if not progress_header_printed:
print("Preprocessing audio data -")
progress_header_printed = True
print("Preprocessed {} of {} examples".format(i, len(audio_data)))
last_progress_update = _time.time()

return np.asarray(preprocessed_data), np.asarray(output_labels)
if progress_header_printed:
print("Preprocessed {} of {} examples\n".format(len(audio_data), len(audio_data)))
return _np.asarray(preprocessed_data), audio_data_index

@staticmethod
def _build_net():
@@ -100,7 +108,7 @@ def __init__(self):
model_path = vggish_model_file.get_model_path(format='coreml')
self.vggish_model = MLModel(model_path)

def extract_features(self, preprocessed_data, verbose=True):
def _extract_features(self, preprocessed_data, verbose=True):
"""
Parameters
----------
@@ -110,9 +118,10 @@ def extract_features(self, preprocessed_data, verbose=True):
-------
numpy array containing the deep features
"""
import numpy as np

last_progress_update = _time.time()
progress_header_printed = False

deep_features = _tc.SArrayBuilder(_np.ndarray)

if _mac_ver() < (10, 14):
# Use MXNet
@@ -123,20 +132,23 @@ def extract_features(self, preprocessed_data, verbose=True):
ctx_list = ctx_list[:len(preprocessed_data)]
batches = utils.split_and_load(preprocessed_data, ctx_list=ctx_list, even_split=False)

deep_features = []
for i, cur_batch in enumerate(batches):
y = self.vggish_model.forward(cur_batch).asnumpy()
for j in y:
deep_features.append(j)

# If `verbose` is set, print an progress update about every 20s
if verbose and _time.time() - last_progress_update >= 20:
if not progress_header_printed:
print("Extracting deep features -")
progress_header_printed = True
print("Extracted {} of {} batches".format(i, len(batches)))
last_progress_update = _time.time()
if progress_header_printed:
print("Extracted {} of {} batches\n".format(len(batches), len(batches)))

else:
# Use Core ML
deep_features = []
for i, cur_example in enumerate(preprocessed_data):
for cur_frame in cur_example:
x = {'input1': [cur_frame]}
@@ -145,11 +157,28 @@ def extract_features(self, preprocessed_data, verbose=True):

# If `verbose` is set, print an progress update about every 20s
if verbose and _time.time() - last_progress_update >= 20:
if not progress_header_printed:
print("Extracting deep features -")
progress_header_printed = True
print("Extracted {} of {}".format(i, len(preprocessed_data)))
last_progress_update = _time.time()
if progress_header_printed:
print("Extracted {} of {}\n".format(len(preprocessed_data), len(preprocessed_data)))

return np.asarray(deep_features)
return deep_features.close()

def get_deep_features(self, audio_data, verbose):
'''
Performs both audio preprocessing and VGGish deep feature extraction.
'''
preprocessed_data, row_ids = self._preprocess_data(audio_data, verbose)
deep_features = self._extract_features(preprocessed_data, verbose)

output = _tc.SFrame({'deep features': deep_features, 'row id': row_ids})
output = output.unstack('deep features')
output = output.sort('row id')
return output['List of deep features']

def get_spec(self):
"""
Return the Core ML spec
Oops, something went wrong.

0 comments on commit e9b7ac6

Please sign in to comment.
You can’t perform that action at this time.