Skip to content

Commit

Permalink
Merge pull request #38 from bmcfee/conv-shapes
Browse files Browse the repository at this point in the history
Conv shapes
  • Loading branch information
bmcfee committed Mar 5, 2017
2 parents 0de182c + 32b5d0c commit 0537b7a
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 107 deletions.
104 changes: 81 additions & 23 deletions pumpp/feature/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import librosa

from ..base import Scope
from ..exceptions import ParameterError


class FeatureExtractor(Scope):
Expand All @@ -21,13 +22,48 @@ class FeatureExtractor(Scope):
hop_length : int > 0
The hop length between analysis frames
conv : {'tf', 'th', None}
convolution dimension ordering:
- 'tf' for tensorflow-style 2D convolution
- 'th' for theano-style 2D convolution
- None for 1D or non-convolutional representations
'''
def __init__(self, name, sr, hop_length):
def __init__(self, name, sr, hop_length, conv=None):

super(FeatureExtractor, self).__init__(name)

if conv not in ('tf', 'th', None):
raise ParameterError('conv="{}", must be one of '
'("tf", "th", None)'.format(conv))

self.sr = sr
self.hop_length = hop_length
self.conv = conv

def register(self, key, dimension, dtype):

shape = [None, dimension]

if self.conv == 'tf':
shape.append(1)

elif self.conv == 'th':
shape.insert(0, 1)

super(FeatureExtractor, self).register(key, shape, dtype)

@property
def idx(self):
if self.conv is None:
return Ellipsis

elif self.conv == 'tf':
return (slice(None), slice(None), np.newaxis)

elif self.conv == 'th':
return (np.newaxis, slice(None), slice(None))

def transform(self, y, sr):
'''Transform an audio signal
Expand Down Expand Up @@ -57,30 +93,52 @@ def transform(self, y, sr):
def transform_audio(self, y):
raise NotImplementedError

def phase_diff(self, phase):
'''Compute the phase differential along a given axis
def phase_diff(phase, axis=0):
'''Compute the phase differential along a given axis
Parameters
----------
phase : np.ndarray
Input phase (in radians)
Parameters
----------
phase : np.ndarray
Input phase (in radians)
Returns
-------
dphase : np.ndarray like `phase`
The phase differential.
'''

axis : int
The axis along which to differentiate
if self.conv is None:
axis = 0
elif self.conv == 'tf':
axis = 0
elif self.conv == 'th':
axis = 1

# Compute the phase differential
dphase = np.empty(phase.shape, dtype=phase.dtype)
zero_idx = [slice(None)] * phase.ndim
zero_idx[axis] = slice(1)
else_idx = [slice(None)] * phase.ndim
else_idx[axis] = slice(1, None)
dphase[zero_idx] = phase[zero_idx]
dphase[else_idx] = np.diff(np.unwrap(phase, axis=axis), axis=axis)
return dphase

def layers(self):
'''Construct Keras input layers for the given transformer
Returns
-------
dphase : np.ndarray like `phase`
The phase differential.
'''
Returns
-------
layers : {field: keras.layers.Input}
A dictionary of keras input layers, keyed by the corresponding
field keys.
'''
from keras.layers import Input

L = dict()
for key in self.fields:
L[key] = Input(name=key,
shape=self.fields[key].shape,
dtype=self.fields[key].dtype)

# Compute the phase differential
dphase = np.empty(phase.shape, dtype=phase.dtype)
zero_idx = [slice(None)] * phase.ndim
zero_idx[axis] = slice(1)
else_idx = [slice(None)] * phase.ndim
else_idx[axis] = slice(1, None)
dphase[zero_idx] = phase[zero_idx]
dphase[else_idx] = np.diff(np.unwrap(phase, axis=axis), axis=axis)
return dphase
return L
21 changes: 12 additions & 9 deletions pumpp/feature/cqt.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
from librosa import cqt, magphase, note_to_hz

from .base import FeatureExtractor, phase_diff
from .base import FeatureExtractor

__all__ = ['CQT', 'CQTMag', 'CQTPhaseDiff']

Expand Down Expand Up @@ -33,8 +33,8 @@ class CQT(FeatureExtractor):
The minimum frequency of the CQT
'''
def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3,
fmin=None):
super(CQT, self).__init__(name, sr, hop_length)
fmin=None, conv=None):
super(CQT, self).__init__(name, sr, hop_length, conv=conv)

if fmin is None:
fmin = note_to_hz('C1')
Expand All @@ -44,8 +44,8 @@ def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3,
self.fmin = fmin

n_bins = n_octaves * 12 * over_sample
self.register('mag', [None, n_bins], np.float32)
self.register('phase', [None, n_bins], np.float32)
self.register('mag', n_bins, np.float32)
self.register('phase', n_bins, np.float32)

def transform_audio(self, y):
'''Compute the CQT
Expand All @@ -72,8 +72,8 @@ def transform_audio(self, y):
self.over_sample * 12),
bins_per_octave=(self.over_sample * 12)))

return {'mag': cqtm.T.astype(np.float32),
'phase': np.angle(phase).T.astype(np.float32)}
return {'mag': cqtm.T.astype(np.float32)[self.idx],
'phase': np.angle(phase).T.astype(np.float32)[self.idx]}


class CQTMag(CQT):
Expand Down Expand Up @@ -117,7 +117,10 @@ class CQTPhaseDiff(CQT):
def __init__(self, *args, **kwargs):
super(CQTPhaseDiff, self).__init__(*args, **kwargs)
phase_field = self.pop('phase')
self.register('dphase', phase_field.shape, phase_field.dtype)

self.register('dphase',
self.n_octaves * 12 * self.over_sample,
phase_field.dtype)

def transform_audio(self, y):
'''Compute the CQT with unwrapped phase
Expand All @@ -137,5 +140,5 @@ def transform_audio(self, y):
Unwrapped phase differential
'''
data = super(CQTPhaseDiff, self).transform_audio(y)
data['dphase'] = phase_diff(data.pop('phase'), axis=0)
data['dphase'] = self.phase_diff(data.pop('phase'))
return data
17 changes: 9 additions & 8 deletions pumpp/feature/fft.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import librosa

from .base import FeatureExtractor, phase_diff
from .base import FeatureExtractor

__all__ = ['STFT', 'STFTMag', 'STFTPhaseDiff']

Expand Down Expand Up @@ -32,13 +32,13 @@ class STFT(FeatureExtractor):
STFTMag
STFTPhaseDiff
'''
def __init__(self, name, sr, hop_length, n_fft):
super(STFT, self).__init__(name, sr, hop_length)
def __init__(self, name, sr, hop_length, n_fft, conv=None):
super(STFT, self).__init__(name, sr, hop_length, conv=conv)

self.n_fft = n_fft

self.register('mag', [None, 1 + n_fft // 2], np.float32)
self.register('phase', [None, 1 + n_fft // 2], np.float32)
self.register('mag', 1 + n_fft // 2, np.float32)
self.register('phase', 1 + n_fft // 2, np.float32)

def transform_audio(self, y):
'''Compute the STFT magnitude and phase.
Expand All @@ -61,7 +61,8 @@ def transform_audio(self, y):
hop_length=self.hop_length,
n_fft=self.n_fft,
dtype=np.float32))
return {'mag': mag.T, 'phase': np.angle(phase.T)}
return {'mag': mag.T[self.idx],
'phase': np.angle(phase.T)[self.idx]}


class STFTPhaseDiff(STFT):
Expand All @@ -74,7 +75,7 @@ class STFTPhaseDiff(STFT):
def __init__(self, *args, **kwargs):
super(STFTPhaseDiff, self).__init__(*args, **kwargs)
phase_field = self.pop('phase')
self.register('dphase', phase_field.shape, phase_field.dtype)
self.register('dphase', 1 + self.n_fft // 2, phase_field.dtype)

def transform_audio(self, y):
'''Compute the STFT with phase differentials.
Expand All @@ -94,7 +95,7 @@ def transform_audio(self, y):
The unwrapped phase differential
'''
data = super(STFTPhaseDiff, self).transform_audio(y)
data['dphase'] = phase_diff(data.pop('phase'), axis=0)
data['dphase'] = self.phase_diff(data.pop('phase'))
return data


Expand Down
9 changes: 5 additions & 4 deletions pumpp/feature/mel.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,15 @@ class Mel(FeatureExtractor):
The maximum frequency bin.
Defaults to `0.5 * sr`
'''
def __init__(self, name, sr, hop_length, n_fft, n_mels, fmax=None):
super(Mel, self).__init__(name, sr, hop_length)
def __init__(self, name, sr, hop_length, n_fft, n_mels, fmax=None,
conv=None):
super(Mel, self).__init__(name, sr, hop_length, conv=conv)

self.n_fft = n_fft
self.n_mels = n_mels
self.fmax = fmax

self.register('mag', [None, n_mels], np.float32)
self.register('mag', n_mels, np.float32)

def transform_audio(self, y):
'''Compute the Mel spectrogram
Expand All @@ -62,4 +63,4 @@ def transform_audio(self, y):
n_mels=self.n_mels,
fmax=self.fmax)).astype(np.float32)

return {'mag': mel.T}
return {'mag': mel.T[self.idx]}
17 changes: 9 additions & 8 deletions pumpp/feature/rhythm.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ class Tempogram(FeatureExtractor):
win_length : int > 0
The length of the analysis window (in frames)
'''
def __init__(self, name, sr, hop_length, win_length):
super(Tempogram, self).__init__(name, sr, hop_length)
def __init__(self, name, sr, hop_length, win_length, conv=None):
super(Tempogram, self).__init__(name, sr, hop_length, conv=conv)

self.win_length = win_length

self.register('tempogram', [None, win_length], np.float32)
self.register('tempogram', win_length, np.float32)

def transform_audio(self, y):
'''Compute the tempogram
Expand All @@ -52,7 +52,7 @@ def transform_audio(self, y):
hop_length=self.hop_length,
win_length=self.win_length).astype(np.float32)

return {'tempogram': tgram.T}
return {'tempogram': tgram.T[self.idx]}


class TempoScale(Tempogram):
Expand All @@ -77,12 +77,13 @@ class TempoScale(Tempogram):
n_fmt : int > 0
Number of scale coefficients to retain
'''
def __init__(self, name, sr, hop_length, win_length, n_fmt=128):
super(TempoScale, self).__init__(name, sr, hop_length, win_length)
def __init__(self, name, sr, hop_length, win_length, n_fmt=128, conv=None):
super(TempoScale, self).__init__(name, sr, hop_length, win_length,
conv=conv)

self.n_fmt = n_fmt
self.pop('tempogram')
self.register('temposcale', [None, 1 + n_fmt // 2], np.float32)
self.register('temposcale', 1 + n_fmt // 2, np.float32)

def transform_audio(self, y):
'''Apply the scale transform to the tempogram
Expand All @@ -101,5 +102,5 @@ def transform_audio(self, y):
data = super(TempoScale, self).transform_audio(y)
data['temposcale'] = np.abs(fmt(data.pop('tempogram'),
axis=1,
n_fmt=self.n_fmt)).astype(np.float32)
n_fmt=self.n_fmt)).astype(np.float32)[self.idx]
return data
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
'mir_eval>=0.4'],
extras_require={
'docs': ['numpydoc'],
'tests': ['pytest', 'pytest-cov']
'tests': ['pytest', 'pytest-cov', 'keras', 'tensorflow'],
'keras': ['keras'],
}
)

0 comments on commit 0537b7a

Please sign in to comment.