Skip to content

Commit

Permalink
Merge pull request #16 from bmcfee/documentation
Browse files Browse the repository at this point in the history
Documentation
  • Loading branch information
bmcfee committed Sep 13, 2016
2 parents c5cfedd + 2e153ca commit 66a1f62
Show file tree
Hide file tree
Showing 11 changed files with 716 additions and 85 deletions.
32 changes: 31 additions & 1 deletion pumpp/feature/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,19 @@


class FeatureExtractor(Scope):
'''The base feature extractor class.
Attributes
----------
name : str
The name for this feature extractor
sr : number > 0
The sampling rate of audio for analysis
hop_length : int > 0
The hop length between analysis frames
'''
def __init__(self, name, sr, hop_length):

super(FeatureExtractor, self).__init__(name)
Expand All @@ -18,7 +30,25 @@ def __init__(self, name, sr, hop_length):
self.hop_length = hop_length

def transform(self, y, sr):
'''Transform an audio signal
Parameters
----------
y : np.ndarray
The audio signal
sr : number > 0
The native sampling rate of y
Returns
-------
dict
Data dictionary containing features extracted from y
See Also
--------
transform_audio
'''
if sr != self.sr:
y = librosa.resample(y, sr, self.sr)

Expand All @@ -30,7 +60,7 @@ def transform_audio(self, y):

def phase_diff(phase, axis=0):
'''Compute the phase differential along a given axis
Parameters
----------
phase : np.ndarray
Expand Down
112 changes: 93 additions & 19 deletions pumpp/feature/cqt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,67 +2,141 @@
'''CQT features'''

import numpy as np
import librosa
from librosa import cqt, magphase, note_to_hz

from .base import FeatureExtractor, phase_diff

__all__ = ['CQT', 'CQTMag', 'CQTPhaseDiff']


class CQT(FeatureExtractor):
'''Constant-Q transform
def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3, fmin=None):
Attributes
----------
name : str
The name for this feature extractor
sr : number > 0
The sampling rate of audio
hop_length : int > 0
The number of samples between CQT frames
n_octaves : int > 0
The number of octaves in the CQT
over_sample : int > 0
The amount of frequency oversampling (bins per semitone)
fmin : float > 0
The minimum frequency of the CQT
'''
def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3,
fmin=None):
super(CQT, self).__init__(name, sr, hop_length)

if fmin is None:
fmin = librosa.note_to_hz('C1')
fmin = note_to_hz('C1')

self.n_octaves = n_octaves
self.over_sample = over_sample
self.fmin = fmin

self.register('mag', [None, n_octaves * 12 * over_sample], np.float32)
self.register('phase', [None, n_octaves * 12 * over_sample], np.float32)
n_bins = n_octaves * 12 * over_sample
self.register('mag', [None, n_bins], np.float32)
self.register('phase', [None, n_bins], np.float32)

def transform_audio(self, y):

cqt, phase = librosa.magphase(librosa.cqt(y=y,
sr=self.sr,
hop_length=self.hop_length,
fmin=self.fmin,
n_bins=self.n_octaves *
self.over_sample * 12,
bins_per_octave=self.over_sample * 12,
real=False))

return {'mag': cqt.T.astype(np.float32),
'''Compute the CQT
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape = (n_frames, n_bins)
The CQT magnitude
data['phase']: np.ndarray, shape = mag.shape
The CQT phase
'''
cqtm, phase = magphase(cqt(y=y,
sr=self.sr,
hop_length=self.hop_length,
fmin=self.fmin,
n_bins=(self.n_octaves *
self.over_sample * 12),
bins_per_octave=(self.over_sample * 12),
real=False))

return {'mag': cqtm.T.astype(np.float32),
'phase': np.angle(phase).T.astype(np.float32)}


class CQTMag(CQT):
'''Magnitude CQT
def __init__(self, *args, **kwargs):
See Also
--------
CQT
'''

def __init__(self, *args, **kwargs):
super(CQTMag, self).__init__(*args, **kwargs)
self.pop('phase')

def transform_audio(self, y):

'''Compute CQT magnitude.
Parameters
----------
y : np.ndarray
the audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape=(n_frames, n_bins)
The CQT magnitude
'''
data = super(CQTMag, self).transform_audio(y)
data.pop('phase')
return data


class CQTPhaseDiff(CQT):
'''CQT with unwrapped phase differentials
See Also
--------
CQT
'''
def __init__(self, *args, **kwargs):

super(CQTPhaseDiff, self).__init__(*args, **kwargs)
phase_field = self.pop('phase')
self.register('dphase', phase_field.shape, phase_field.dtype)

def transform_audio(self, y):
'''Compute the CQT with unwrapped phase
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape=(n_frames, n_bins)
CQT magnitude
data['dphase'] : np.ndarray, shape=(n_frames, n_bins)
Unwrapped phase differential
'''
data = super(CQTPhaseDiff, self).transform_audio(y)
data['dphase'] = phase_diff(data.pop('phase'), axis=0)
return data
83 changes: 77 additions & 6 deletions pumpp/feature/fft.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python
"""STFT feature extractors"""

import numpy as np
import librosa
Expand All @@ -9,9 +10,29 @@


class STFT(FeatureExtractor):
'''Short-time Fourier Transform (STFT) with both magnitude
and phase.
def __init__(self, name, sr, hop_length, n_fft):
Attributes
----------
name : str
The name of this transformer
sr : number > 0
The sampling rate of audio
hop_length : int > 0
The hop length of STFT frames
n_fft : int > 0
The number of FFT bins per frame
See Also
--------
STFTMag
STFTPhaseDiff
'''
def __init__(self, name, sr, hop_length, n_fft):
super(STFT, self).__init__(name, sr, hop_length)

self.n_fft = n_fft
Expand All @@ -20,7 +41,22 @@ def __init__(self, name, sr, hop_length, n_fft):
self.register('phase', [None, 1 + n_fft // 2], np.float32)

def transform_audio(self, y):

'''Compute the STFT magnitude and phase.
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
STFT magnitude
data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
STFT phase
'''
mag, phase = librosa.magphase(librosa.stft(y,
hop_length=self.hop_length,
n_fft=self.n_fft,
Expand All @@ -29,29 +65,64 @@ def transform_audio(self, y):


class STFTPhaseDiff(STFT):
'''STFT with phase differentials
See Also
--------
STFT
'''
def __init__(self, *args, **kwargs):

super(STFTPhaseDiff, self).__init__(*args, **kwargs)
phase_field = self.pop('phase')
self.register('dphase', phase_field.shape, phase_field.dtype)

def transform_audio(self, y):

'''Compute the STFT with phase differentials.
Parameters
----------
y : np.ndarray
the audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
The STFT magnitude
data['dphase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
The unwrapped phase differential
'''
data = super(STFTPhaseDiff, self).transform_audio(y)
data['dphase'] = phase_diff(data.pop('phase'), axis=0)
return data


class STFTMag(STFT):
'''STFT with only magnitude.
See Also
--------
STFT
'''
def __init__(self, *args, **kwargs):

super(STFTMag, self).__init__(*args, **kwargs)
self.pop('phase')

def transform_audio(self, y):

'''Compute the STFT
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
The STFT magnitude
'''
data = super(STFTMag, self).transform_audio(y)
data.pop('phase')

Expand Down
27 changes: 20 additions & 7 deletions pumpp/feature/mel.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

#!/usr/bin/env python
"""Mel spectrogram"""

import numpy as np
import librosa
from librosa.feature import melspectrogram

from .base import FeatureExtractor

Expand Down Expand Up @@ -34,7 +34,6 @@ class Mel(FeatureExtractor):
Defaults to `0.5 * sr`
'''
def __init__(self, name, sr, hop_length, n_fft, n_mels, fmax=None):

super(Mel, self).__init__(name, sr, hop_length)

self.n_fft = n_fft
Expand All @@ -44,9 +43,23 @@ def __init__(self, name, sr, hop_length, n_fft, n_mels, fmax=None):
self.register('mag', [None, n_mels], np.float32)

def transform_audio(self, y):
mel = np.sqrt(librosa.feature.melspectrogram(y=y, sr=self.sr, n_fft=self.n_fft,
hop_length=self.hop_length,
n_mels=self.n_mels,
fmax=self.fmax)).astype(np.float32)
'''Compute the Mel spectrogram
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape=(n_frames, n_mels)
The Mel spectrogram
'''
mel = np.sqrt(melspectrogram(y=y, sr=self.sr,
n_fft=self.n_fft,
hop_length=self.hop_length,
n_mels=self.n_mels,
fmax=self.fmax)).astype(np.float32)

return {'mag': mel.T}

0 comments on commit 66a1f62

Please sign in to comment.