Skip to content
Permalink
Browse files

Use AudioPreprocessor instead of preprocess_audio in service.py (#80)

- Fixes #75
  • Loading branch information
matyasfodor authored and daemon committed Mar 16, 2019
1 parent 2bb7b54 commit 14179c75611f140b11fc2b2f4c244ebdfc9e7444
Showing with 7 additions and 7 deletions.
  1. +5 −5 service.py
  2. +1 −1 utils/manage_audio.py
  3. +1 −1 utils/model.py
@@ -19,7 +19,7 @@
except ImportError:
pass

from utils.manage_audio import AudioSnippet, preprocess_audio
from utils.manage_audio import AudioSnippet, AudioPreprocessor
try:
import utils.model as model
except ImportError:
@@ -56,14 +56,14 @@ class Caffe2LabelService(LabelService):
def __init__(self, onnx_filename, labels):
self.labels = labels
self.model_filename = onnx_filename
self.filters = librosa.filters.dct(40, 40)
self.audio_processor = AudioPreprocessor()
self._graph = onnx.load(onnx_filename)
self._in_name = self._graph.graph.input[0].name
self.model = onnx_caffe2.backend.prepare(self._graph)

def label(self, wav_data):
wav_data = np.frombuffer(wav_data, dtype=np.int16) / 32768.
model_in = np.expand_dims(preprocess_audio(wav_data, 40, self.filters), 0)
model_in = np.expand_dims(self.audio_processor.compute_mfccs(wav_data).squeeze(2), 0)
model_in = np.expand_dims(model_in, 0)
model_in = model_in.astype(np.float32)
predictions = _softmax(self.model.run({self._in_name: model_in})[0])
@@ -74,7 +74,7 @@ def __init__(self, model_filename, no_cuda=False, labels=["_silence_", "_unknown
self.labels = labels
self.model_filename = model_filename
self.no_cuda = no_cuda
self.filters = librosa.filters.dct(40, 40)
self.audio_processor = AudioPreprocessor()
self.reload()

def reload(self):
@@ -96,7 +96,7 @@ def label(self, wav_data):
A (most likely label, probability) tuple
"""
wav_data = np.frombuffer(wav_data, dtype=np.int16) / 32768.
model_in = torch.from_numpy(preprocess_audio(wav_data, 40, self.filters)).unsqueeze(0)
model_in = torch.from_numpy(self.audio_processor.compute_mfccs(wav_data).squeeze(2)).unsqueeze(0)
model_in = torch.autograd.Variable(model_in, requires_grad=False)
if not self.no_cuda:
model_in = model_in.cuda()
@@ -38,7 +38,7 @@ def compute_mfccs(self, data):
fmax=self.f_max)
data[data > 0] = np.log(data[data > 0])
data = [np.matmul(self.dct_filters, x) for x in np.split(data, data.shape[1], axis=1)]
data = np.array(data, order="F").reshape(1, 101, 40).astype(np.float32)
data = np.array(data, order="F").astype(np.float32)
return data

def compute_pcen(self, data):
@@ -255,7 +255,7 @@ def collate_fn(self, data):
y = []
for audio_data, label in data:
if self.audio_preprocess_type == "MFCCs":
audio_tensor = torch.from_numpy(self.audio_processor.compute_mfccs(audio_data))
audio_tensor = torch.from_numpy(self.audio_processor.compute_mfccs(audio_data).reshape(1, 101, 40))
x = audio_tensor if x is None else torch.cat((x, audio_tensor), 0)
elif self.audio_preprocess_type == "PCEN":
audio_tensor = torch.from_numpy(np.expand_dims(audio_data, axis=0))

0 comments on commit 14179c7

Please sign in to comment.
You can’t perform that action at this time.