Merge pull request #59 from bmcfee/hcqt

fixed #19, implemented hcqt
bmcfee · Apr 4, 2017 · 0b3529b · 0b3529b
2 parents 6be75b4 + 36e5846
commit 0b3529b
Show file tree

Hide file tree

Showing 4 changed files with 360 additions and 12 deletions.
diff --git a/pumpp/feature/__init__.py b/pumpp/feature/__init__.py
@@ -11,6 +11,9 @@
     CQT
     CQTMag
     CQTPhaseDiff
+    HCQT
+    HCQTMag
+    HCQTPhaseDiff
     STFT
     STFTMag
     STFTPhaseDiff

diff --git a/pumpp/feature/base.py b/pumpp/feature/base.py
@@ -44,15 +44,15 @@ def __init__(self, name, sr, hop_length, conv=None):
         self.hop_length = hop_length
         self.conv = conv
 
-    def register(self, key, dimension, dtype):
+    def register(self, key, dimension, dtype, channels=1):
 
         shape = [None, dimension]
 
         if self.conv in ('channels_last', 'tf'):
-            shape.append(1)
+            shape.append(channels)
 
         elif self.conv in ('channels_first', 'th'):
-            shape.insert(0, 1)
+            shape.insert(0, channels)
 
         super(FeatureExtractor, self).register(key, shape, dtype)
 

diff --git a/pumpp/feature/cqt.py b/pumpp/feature/cqt.py
@@ -5,8 +5,10 @@
 from librosa import cqt, magphase, note_to_hz, amplitude_to_db
 
 from .base import FeatureExtractor
+from ..exceptions import ParameterError
 
-__all__ = ['CQT', 'CQTMag', 'CQTPhaseDiff']
+__all__ = ['CQT', 'CQTMag', 'CQTPhaseDiff',
+           'HCQT', 'HCQTMag', 'HCQTPhaseDiff']
 
 
 class CQT(FeatureExtractor):
@@ -151,3 +153,191 @@ def transform_audio(self, y):
         data = super(CQTPhaseDiff, self).transform_audio(y)
         data['dphase'] = self.phase_diff(data.pop('phase'))
         return data
+
+
+class HCQT(FeatureExtractor):
+    '''Harmonic Constant-Q transform
+
+    Attributes
+    ----------
+    name : str
+        The name for this feature extractor
+
+    sr : number > 0
+        The sampling rate of audio
+
+    hop_length : int > 0
+        The number of samples between CQT frames
+
+    n_octaves : int > 0
+        The number of octaves in the CQT
+
+    over_sample : int > 0
+        The amount of frequency oversampling (bins per semitone)
+
+    fmin : float > 0
+        The minimum frequency of the CQT
+
+    harmonics : list of int >= 1
+        The list of harmonics to compute
+
+    log : boolean
+        If `True`, scale the magnitude to decibels
+
+        Otherwise, use linear magnitude
+
+    conv : {'tf', 'th', 'channels_last', 'channels_first', None}
+        convolution dimension ordering:
+
+            - 'channels_last' for tensorflow-style 2D convolution
+            - 'tf' equivalent to 'channels_last'
+            - 'channels_first' for theano-style 2D convolution
+            - 'th' equivalent to 'channels_first'
+
+    '''
+    def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3,
+                 fmin=None, harmonics=None, log=False, conv='channels_last'):
+
+        if conv not in ('channels_last', 'tf', 'channels_first', 'th'):
+            raise ParameterError('Invalid conv={}'.format(conv))
+
+        super(HCQT, self).__init__(name, sr, hop_length, conv=conv)
+
+        if fmin is None:
+            fmin = note_to_hz('C1')
+
+        if harmonics is None:
+            harmonics = [1]
+        else:
+            harmonics = list(harmonics)
+            if not all(isinstance(_, int) and _ > 0 for _ in harmonics):
+                raise ParameterError('Invalid harmonics={}'.format(harmonics))
+
+        self.n_octaves = n_octaves
+        self.over_sample = over_sample
+        self.fmin = fmin
+        self.log = log
+        self.harmonics = harmonics
+
+        n_bins = n_octaves * 12 * over_sample
+        self.register('mag', n_bins, np.float32, channels=len(harmonics))
+        self.register('phase', n_bins, np.float32, channels=len(harmonics))
+
+    def transform_audio(self, y):
+        '''Compute the HCQT
+
+        Parameters
+        ----------
+        y : np.ndarray
+            The audio buffer
+
+        Returns
+        -------
+        data : dict
+            data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics)
+                The CQT magnitude
+
+            data['phase']: np.ndarray, shape = mag.shape
+                The CQT phase
+        '''
+        cqtm, phase = [], []
+
+        for h in self.harmonics:
+            C, P = magphase(cqt(y=y,
+                                sr=self.sr,
+                                hop_length=self.hop_length,
+                                fmin=self.fmin * h,
+                                n_bins=(self.n_octaves *
+                                        self.over_sample * 12),
+                                bins_per_octave=(self.over_sample * 12)))
+            if self.log:
+                C = amplitude_to_db(C, ref=np.max)
+            cqtm.append(C)
+            phase.append(P)
+
+        cqtm = np.asarray(cqtm).astype(np.float32)
+        phase = np.angle(np.asarray(phase)).astype(np.float32)
+
+        return {'mag': self._index(cqtm),
+                'phase': self._index(phase)}
+
+    def _index(self, value):
+        '''Rearrange a tensor according to the convolution mode
+
+        Input is assumed to be in (channels, bins, time) format.
+        '''
+
+        if self.conv in ('channels_last', 'tf'):
+            return np.transpose(value, (2, 1, 0))
+
+        else:  # self.conv in ('channels_first', 'th')
+            return np.transpose(value, (0, 2, 1))
+
+
+class HCQTMag(HCQT):
+    '''Magnitude HCQT
+
+    See Also
+    --------
+    HCQT
+    '''
+
+    def __init__(self, *args, **kwargs):
+        super(HCQTMag, self).__init__(*args, **kwargs)
+        self.pop('phase')
+
+    def transform_audio(self, y):
+        '''Compute HCQT magnitude.
+
+        Parameters
+        ----------
+        y : np.ndarray
+            the audio buffer
+
+        Returns
+        -------
+        data : dict
+            data['mag'] : np.ndarray, shape=(n_frames, n_bins)
+                The CQT magnitude
+        '''
+        data = super(HCQTMag, self).transform_audio(y)
+        data.pop('phase')
+        return data
+
+
+class HCQTPhaseDiff(HCQT):
+    '''HCQT with unwrapped phase differentials
+
+    See Also
+    --------
+    HCQT
+    '''
+    def __init__(self, *args, **kwargs):
+        super(HCQTPhaseDiff, self).__init__(*args, **kwargs)
+        phase_field = self.pop('phase')
+
+        self.register('dphase',
+                      self.n_octaves * 12 * self.over_sample,
+                      phase_field.dtype,
+                      channels=len(self.harmonics))
+
+    def transform_audio(self, y):
+        '''Compute the HCQT with unwrapped phase
+
+        Parameters
+        ----------
+        y : np.ndarray
+            The audio buffer
+
+        Returns
+        -------
+        data : dict
+            data['mag'] : np.ndarray, shape=(n_frames, n_bins)
+                CQT magnitude
+
+            data['dphase'] : np.ndarray, shape=(n_frames, n_bins)
+                Unwrapped phase differential
+        '''
+        data = super(HCQTPhaseDiff, self).transform_audio(y)
+        data['dphase'] = self.phase_diff(data.pop('phase'))
+        return data