Skip to content

Commit

Permalink
Merge pull request #59 from bmcfee/hcqt
Browse files Browse the repository at this point in the history
fixed #19, implemented hcqt
  • Loading branch information
bmcfee committed Apr 4, 2017
2 parents 6be75b4 + 36e5846 commit 0b3529b
Show file tree
Hide file tree
Showing 4 changed files with 360 additions and 12 deletions.
3 changes: 3 additions & 0 deletions pumpp/feature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
CQT
CQTMag
CQTPhaseDiff
HCQT
HCQTMag
HCQTPhaseDiff
STFT
STFTMag
STFTPhaseDiff
Expand Down
6 changes: 3 additions & 3 deletions pumpp/feature/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,15 @@ def __init__(self, name, sr, hop_length, conv=None):
self.hop_length = hop_length
self.conv = conv

def register(self, key, dimension, dtype):
def register(self, key, dimension, dtype, channels=1):

shape = [None, dimension]

if self.conv in ('channels_last', 'tf'):
shape.append(1)
shape.append(channels)

elif self.conv in ('channels_first', 'th'):
shape.insert(0, 1)
shape.insert(0, channels)

super(FeatureExtractor, self).register(key, shape, dtype)

Expand Down
192 changes: 191 additions & 1 deletion pumpp/feature/cqt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from librosa import cqt, magphase, note_to_hz, amplitude_to_db

from .base import FeatureExtractor
from ..exceptions import ParameterError

__all__ = ['CQT', 'CQTMag', 'CQTPhaseDiff']
__all__ = ['CQT', 'CQTMag', 'CQTPhaseDiff',
'HCQT', 'HCQTMag', 'HCQTPhaseDiff']


class CQT(FeatureExtractor):
Expand Down Expand Up @@ -151,3 +153,191 @@ def transform_audio(self, y):
data = super(CQTPhaseDiff, self).transform_audio(y)
data['dphase'] = self.phase_diff(data.pop('phase'))
return data


class HCQT(FeatureExtractor):
'''Harmonic Constant-Q transform
Attributes
----------
name : str
The name for this feature extractor
sr : number > 0
The sampling rate of audio
hop_length : int > 0
The number of samples between CQT frames
n_octaves : int > 0
The number of octaves in the CQT
over_sample : int > 0
The amount of frequency oversampling (bins per semitone)
fmin : float > 0
The minimum frequency of the CQT
harmonics : list of int >= 1
The list of harmonics to compute
log : boolean
If `True`, scale the magnitude to decibels
Otherwise, use linear magnitude
conv : {'tf', 'th', 'channels_last', 'channels_first', None}
convolution dimension ordering:
- 'channels_last' for tensorflow-style 2D convolution
- 'tf' equivalent to 'channels_last'
- 'channels_first' for theano-style 2D convolution
- 'th' equivalent to 'channels_first'
'''
def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3,
fmin=None, harmonics=None, log=False, conv='channels_last'):

if conv not in ('channels_last', 'tf', 'channels_first', 'th'):
raise ParameterError('Invalid conv={}'.format(conv))

super(HCQT, self).__init__(name, sr, hop_length, conv=conv)

if fmin is None:
fmin = note_to_hz('C1')

if harmonics is None:
harmonics = [1]
else:
harmonics = list(harmonics)
if not all(isinstance(_, int) and _ > 0 for _ in harmonics):
raise ParameterError('Invalid harmonics={}'.format(harmonics))

self.n_octaves = n_octaves
self.over_sample = over_sample
self.fmin = fmin
self.log = log
self.harmonics = harmonics

n_bins = n_octaves * 12 * over_sample
self.register('mag', n_bins, np.float32, channels=len(harmonics))
self.register('phase', n_bins, np.float32, channels=len(harmonics))

def transform_audio(self, y):
'''Compute the HCQT
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics)
The CQT magnitude
data['phase']: np.ndarray, shape = mag.shape
The CQT phase
'''
cqtm, phase = [], []

for h in self.harmonics:
C, P = magphase(cqt(y=y,
sr=self.sr,
hop_length=self.hop_length,
fmin=self.fmin * h,
n_bins=(self.n_octaves *
self.over_sample * 12),
bins_per_octave=(self.over_sample * 12)))
if self.log:
C = amplitude_to_db(C, ref=np.max)
cqtm.append(C)
phase.append(P)

cqtm = np.asarray(cqtm).astype(np.float32)
phase = np.angle(np.asarray(phase)).astype(np.float32)

return {'mag': self._index(cqtm),
'phase': self._index(phase)}

def _index(self, value):
'''Rearrange a tensor according to the convolution mode
Input is assumed to be in (channels, bins, time) format.
'''

if self.conv in ('channels_last', 'tf'):
return np.transpose(value, (2, 1, 0))

else: # self.conv in ('channels_first', 'th')
return np.transpose(value, (0, 2, 1))


class HCQTMag(HCQT):
'''Magnitude HCQT
See Also
--------
HCQT
'''

def __init__(self, *args, **kwargs):
super(HCQTMag, self).__init__(*args, **kwargs)
self.pop('phase')

def transform_audio(self, y):
'''Compute HCQT magnitude.
Parameters
----------
y : np.ndarray
the audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape=(n_frames, n_bins)
The CQT magnitude
'''
data = super(HCQTMag, self).transform_audio(y)
data.pop('phase')
return data


class HCQTPhaseDiff(HCQT):
'''HCQT with unwrapped phase differentials
See Also
--------
HCQT
'''
def __init__(self, *args, **kwargs):
super(HCQTPhaseDiff, self).__init__(*args, **kwargs)
phase_field = self.pop('phase')

self.register('dphase',
self.n_octaves * 12 * self.over_sample,
phase_field.dtype,
channels=len(self.harmonics))

def transform_audio(self, y):
'''Compute the HCQT with unwrapped phase
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape=(n_frames, n_bins)
CQT magnitude
data['dphase'] : np.ndarray, shape=(n_frames, n_bins)
Unwrapped phase differential
'''
data = super(HCQTPhaseDiff, self).transform_audio(y)
data['dphase'] = self.phase_diff(data.pop('phase'))
return data

0 comments on commit 0b3529b

Please sign in to comment.