Merge pull request #38 from bmcfee/conv-shapes

Conv shapes
bmcfee · Mar 5, 2017 · 0537b7a · 0537b7a
2 parents 0de182c + 32b5d0c
commit 0537b7a
Show file tree

Hide file tree

Showing 7 changed files with 206 additions and 107 deletions.
diff --git a/pumpp/feature/base.py b/pumpp/feature/base.py
@@ -6,6 +6,7 @@
 import librosa
 
 from ..base import Scope
+from ..exceptions import ParameterError
 
 
 class FeatureExtractor(Scope):
@@ -21,13 +22,48 @@ class FeatureExtractor(Scope):
 
     hop_length : int > 0
         The hop length between analysis frames
+
+    conv : {'tf', 'th', None}
+        convolution dimension ordering:
+
+            - 'tf' for tensorflow-style 2D convolution
+            - 'th' for theano-style 2D convolution
+            - None for 1D or non-convolutional representations
     '''
-    def __init__(self, name, sr, hop_length):
+    def __init__(self, name, sr, hop_length, conv=None):
 
         super(FeatureExtractor, self).__init__(name)
 
+        if conv not in ('tf', 'th', None):
+            raise ParameterError('conv="{}", must be one of '
+                                 '("tf", "th", None)'.format(conv))
+
         self.sr = sr
         self.hop_length = hop_length
+        self.conv = conv
+
+    def register(self, key, dimension, dtype):
+
+        shape = [None, dimension]
+
+        if self.conv == 'tf':
+            shape.append(1)
+
+        elif self.conv == 'th':
+            shape.insert(0, 1)
+
+        super(FeatureExtractor, self).register(key, shape, dtype)
+
+    @property
+    def idx(self):
+        if self.conv is None:
+            return Ellipsis
+
+        elif self.conv == 'tf':
+            return (slice(None), slice(None), np.newaxis)
+
+        elif self.conv == 'th':
+            return (np.newaxis, slice(None), slice(None))
 
     def transform(self, y, sr):
         '''Transform an audio signal
@@ -57,30 +93,52 @@ def transform(self, y, sr):
     def transform_audio(self, y):
         raise NotImplementedError
 
+    def phase_diff(self, phase):
+        '''Compute the phase differential along a given axis
 
-def phase_diff(phase, axis=0):
-    '''Compute the phase differential along a given axis
+        Parameters
+        ----------
+        phase : np.ndarray
+            Input phase (in radians)
 
-    Parameters
-    ----------
-    phase : np.ndarray
-        Input phase (in radians)
+        Returns
+        -------
+        dphase : np.ndarray like `phase`
+            The phase differential.
+        '''
 
-    axis : int
-        The axis along which to differentiate
+        if self.conv is None:
+            axis = 0
+        elif self.conv == 'tf':
+            axis = 0
+        elif self.conv == 'th':
+            axis = 1
+
+        # Compute the phase differential
+        dphase = np.empty(phase.shape, dtype=phase.dtype)
+        zero_idx = [slice(None)] * phase.ndim
+        zero_idx[axis] = slice(1)
+        else_idx = [slice(None)] * phase.ndim
+        else_idx[axis] = slice(1, None)
+        dphase[zero_idx] = phase[zero_idx]
+        dphase[else_idx] = np.diff(np.unwrap(phase, axis=axis), axis=axis)
+        return dphase
+
+    def layers(self):
+        '''Construct Keras input layers for the given transformer
 
-    Returns
-    -------
-    dphase : np.ndarray like `phase`
-        The phase differential.
-    '''
+        Returns
+        -------
+        layers : {field: keras.layers.Input}
+            A dictionary of keras input layers, keyed by the corresponding
+            field keys.
+        '''
+        from keras.layers import Input
+
+        L = dict()
+        for key in self.fields:
+            L[key] = Input(name=key,
+                           shape=self.fields[key].shape,
+                           dtype=self.fields[key].dtype)
 
-    # Compute the phase differential
-    dphase = np.empty(phase.shape, dtype=phase.dtype)
-    zero_idx = [slice(None)] * phase.ndim
-    zero_idx[axis] = slice(1)
-    else_idx = [slice(None)] * phase.ndim
-    else_idx[axis] = slice(1, None)
-    dphase[zero_idx] = phase[zero_idx]
-    dphase[else_idx] = np.diff(np.unwrap(phase, axis=axis), axis=axis)
-    return dphase
+        return L
diff --git a/pumpp/feature/cqt.py b/pumpp/feature/cqt.py
@@ -4,7 +4,7 @@
 import numpy as np
 from librosa import cqt, magphase, note_to_hz
 
-from .base import FeatureExtractor, phase_diff
+from .base import FeatureExtractor
 
 __all__ = ['CQT', 'CQTMag', 'CQTPhaseDiff']
 
@@ -33,8 +33,8 @@ class CQT(FeatureExtractor):
         The minimum frequency of the CQT
     '''
     def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3,
-                 fmin=None):
-        super(CQT, self).__init__(name, sr, hop_length)
+                 fmin=None, conv=None):
+        super(CQT, self).__init__(name, sr, hop_length, conv=conv)
 
         if fmin is None:
             fmin = note_to_hz('C1')
@@ -44,8 +44,8 @@ def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3,
         self.fmin = fmin
 
         n_bins = n_octaves * 12 * over_sample
-        self.register('mag', [None, n_bins], np.float32)
-        self.register('phase', [None, n_bins], np.float32)
+        self.register('mag', n_bins, np.float32)
+        self.register('phase', n_bins, np.float32)
 
     def transform_audio(self, y):
         '''Compute the CQT
@@ -72,8 +72,8 @@ def transform_audio(self, y):
                                            self.over_sample * 12),
                                    bins_per_octave=(self.over_sample * 12)))
 
-        return {'mag': cqtm.T.astype(np.float32),
-                'phase': np.angle(phase).T.astype(np.float32)}
+        return {'mag': cqtm.T.astype(np.float32)[self.idx],
+                'phase': np.angle(phase).T.astype(np.float32)[self.idx]}
 
 
 class CQTMag(CQT):
@@ -117,7 +117,10 @@ class CQTPhaseDiff(CQT):
     def __init__(self, *args, **kwargs):
         super(CQTPhaseDiff, self).__init__(*args, **kwargs)
         phase_field = self.pop('phase')
-        self.register('dphase', phase_field.shape, phase_field.dtype)
+
+        self.register('dphase',
+                      self.n_octaves * 12 * self.over_sample,
+                      phase_field.dtype)
 
     def transform_audio(self, y):
         '''Compute the CQT with unwrapped phase
@@ -137,5 +140,5 @@ def transform_audio(self, y):
                 Unwrapped phase differential
         '''
         data = super(CQTPhaseDiff, self).transform_audio(y)
-        data['dphase'] = phase_diff(data.pop('phase'), axis=0)
+        data['dphase'] = self.phase_diff(data.pop('phase'))
         return data
diff --git a/pumpp/feature/fft.py b/pumpp/feature/fft.py
@@ -4,7 +4,7 @@
 import numpy as np
 import librosa
 
-from .base import FeatureExtractor, phase_diff
+from .base import FeatureExtractor
 
 __all__ = ['STFT', 'STFTMag', 'STFTPhaseDiff']
 
@@ -32,13 +32,13 @@ class STFT(FeatureExtractor):
     STFTMag
     STFTPhaseDiff
     '''
-    def __init__(self, name, sr, hop_length, n_fft):
-        super(STFT, self).__init__(name, sr, hop_length)
+    def __init__(self, name, sr, hop_length, n_fft, conv=None):
+        super(STFT, self).__init__(name, sr, hop_length, conv=conv)
 
         self.n_fft = n_fft
 
-        self.register('mag', [None, 1 + n_fft // 2], np.float32)
-        self.register('phase', [None, 1 + n_fft // 2], np.float32)
+        self.register('mag', 1 + n_fft // 2, np.float32)
+        self.register('phase', 1 + n_fft // 2, np.float32)
 
     def transform_audio(self, y):
         '''Compute the STFT magnitude and phase.
@@ -61,7 +61,8 @@ def transform_audio(self, y):
                                                    hop_length=self.hop_length,
                                                    n_fft=self.n_fft,
                                                    dtype=np.float32))
-        return {'mag': mag.T, 'phase': np.angle(phase.T)}
+        return {'mag': mag.T[self.idx],
+                'phase': np.angle(phase.T)[self.idx]}
 
 
 class STFTPhaseDiff(STFT):
@@ -74,7 +75,7 @@ class STFTPhaseDiff(STFT):
     def __init__(self, *args, **kwargs):
         super(STFTPhaseDiff, self).__init__(*args, **kwargs)
         phase_field = self.pop('phase')
-        self.register('dphase', phase_field.shape, phase_field.dtype)
+        self.register('dphase', 1 + self.n_fft // 2, phase_field.dtype)
 
     def transform_audio(self, y):
         '''Compute the STFT with phase differentials.
@@ -94,7 +95,7 @@ def transform_audio(self, y):
                 The unwrapped phase differential
         '''
         data = super(STFTPhaseDiff, self).transform_audio(y)
-        data['dphase'] = phase_diff(data.pop('phase'), axis=0)
+        data['dphase'] = self.phase_diff(data.pop('phase'))
         return data
 
 

diff --git a/pumpp/feature/mel.py b/pumpp/feature/mel.py
@@ -33,14 +33,15 @@ class Mel(FeatureExtractor):
         The maximum frequency bin.
         Defaults to `0.5 * sr`
     '''
-    def __init__(self, name, sr, hop_length, n_fft, n_mels, fmax=None):
-        super(Mel, self).__init__(name, sr, hop_length)
+    def __init__(self, name, sr, hop_length, n_fft, n_mels, fmax=None,
+                 conv=None):
+        super(Mel, self).__init__(name, sr, hop_length, conv=conv)
 
         self.n_fft = n_fft
         self.n_mels = n_mels
         self.fmax = fmax
 
-        self.register('mag', [None, n_mels], np.float32)
+        self.register('mag', n_mels, np.float32)
 
     def transform_audio(self, y):
         '''Compute the Mel spectrogram
@@ -62,4 +63,4 @@ def transform_audio(self, y):
                                      n_mels=self.n_mels,
                                      fmax=self.fmax)).astype(np.float32)
 
-        return {'mag': mel.T}
+        return {'mag': mel.T[self.idx]}
diff --git a/pumpp/feature/rhythm.py b/pumpp/feature/rhythm.py
@@ -27,12 +27,12 @@ class Tempogram(FeatureExtractor):
     win_length : int > 0
         The length of the analysis window (in frames)
     '''
-    def __init__(self, name, sr, hop_length, win_length):
-        super(Tempogram, self).__init__(name, sr, hop_length)
+    def __init__(self, name, sr, hop_length, win_length, conv=None):
+        super(Tempogram, self).__init__(name, sr, hop_length, conv=conv)
 
         self.win_length = win_length
 
-        self.register('tempogram', [None, win_length], np.float32)
+        self.register('tempogram', win_length, np.float32)
 
     def transform_audio(self, y):
         '''Compute the tempogram
@@ -52,7 +52,7 @@ def transform_audio(self, y):
                           hop_length=self.hop_length,
                           win_length=self.win_length).astype(np.float32)
 
-        return {'tempogram': tgram.T}
+        return {'tempogram': tgram.T[self.idx]}
 
 
 class TempoScale(Tempogram):
@@ -77,12 +77,13 @@ class TempoScale(Tempogram):
     n_fmt : int > 0
         Number of scale coefficients to retain
     '''
-    def __init__(self, name, sr, hop_length, win_length, n_fmt=128):
-        super(TempoScale, self).__init__(name, sr, hop_length, win_length)
+    def __init__(self, name, sr, hop_length, win_length, n_fmt=128, conv=None):
+        super(TempoScale, self).__init__(name, sr, hop_length, win_length,
+                                         conv=conv)
 
         self.n_fmt = n_fmt
         self.pop('tempogram')
-        self.register('temposcale', [None, 1 + n_fmt // 2], np.float32)
+        self.register('temposcale', 1 + n_fmt // 2, np.float32)
 
     def transform_audio(self, y):
         '''Apply the scale transform to the tempogram
@@ -101,5 +102,5 @@ def transform_audio(self, y):
         data = super(TempoScale, self).transform_audio(y)
         data['temposcale'] = np.abs(fmt(data.pop('tempogram'),
                                         axis=1,
-                                        n_fmt=self.n_fmt)).astype(np.float32)
+                                        n_fmt=self.n_fmt)).astype(np.float32)[self.idx]
         return data
diff --git a/setup.py b/setup.py
@@ -34,6 +34,7 @@
                       'mir_eval>=0.4'],
     extras_require={
         'docs': ['numpydoc'],
-        'tests': ['pytest', 'pytest-cov']
+        'tests': ['pytest', 'pytest-cov', 'keras', 'tensorflow'],
+        'keras': ['keras'],
     }
 )