Merge pull request #16 from bmcfee/documentation

Documentation
bmcfee · Sep 13, 2016 · 66a1f62 · 66a1f62
2 parents c5cfedd + 2e153ca
commit 66a1f62
Show file tree

Hide file tree

Showing 11 changed files with 716 additions and 85 deletions.
diff --git a/pumpp/feature/base.py b/pumpp/feature/base.py
@@ -9,7 +9,19 @@
 
 
 class FeatureExtractor(Scope):
+    '''The base feature extractor class.
 
+    Attributes
+    ----------
+    name : str
+        The name for this feature extractor
+
+    sr : number > 0
+        The sampling rate of audio for analysis
+
+    hop_length : int > 0
+        The hop length between analysis frames
+    '''
     def __init__(self, name, sr, hop_length):
 
         super(FeatureExtractor, self).__init__(name)
@@ -18,7 +30,25 @@ def __init__(self, name, sr, hop_length):
         self.hop_length = hop_length
 
     def transform(self, y, sr):
+        '''Transform an audio signal
 
+        Parameters
+        ----------
+        y : np.ndarray
+            The audio signal
+
+        sr : number > 0
+            The native sampling rate of y
+
+        Returns
+        -------
+        dict
+            Data dictionary containing features extracted from y
+
+        See Also
+        --------
+        transform_audio
+        '''
         if sr != self.sr:
             y = librosa.resample(y, sr, self.sr)
 
@@ -30,7 +60,7 @@ def transform_audio(self, y):
 
 def phase_diff(phase, axis=0):
     '''Compute the phase differential along a given axis
-    
+
     Parameters
     ----------
     phase : np.ndarray

diff --git a/pumpp/feature/cqt.py b/pumpp/feature/cqt.py
@@ -2,67 +2,141 @@
 '''CQT features'''
 
 import numpy as np
-import librosa
+from librosa import cqt, magphase, note_to_hz
 
 from .base import FeatureExtractor, phase_diff
 
 __all__ = ['CQT', 'CQTMag', 'CQTPhaseDiff']
 
 
 class CQT(FeatureExtractor):
+    '''Constant-Q transform
 
-    def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3, fmin=None):
+    Attributes
+    ----------
+    name : str
+        The name for this feature extractor
 
+    sr : number > 0
+        The sampling rate of audio
+
+    hop_length : int > 0
+        The number of samples between CQT frames
+
+    n_octaves : int > 0
+        The number of octaves in the CQT
+
+    over_sample : int > 0
+        The amount of frequency oversampling (bins per semitone)
+
+    fmin : float > 0
+        The minimum frequency of the CQT
+    '''
+    def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3,
+                 fmin=None):
         super(CQT, self).__init__(name, sr, hop_length)
 
         if fmin is None:
-            fmin = librosa.note_to_hz('C1')
+            fmin = note_to_hz('C1')
 
         self.n_octaves = n_octaves
         self.over_sample = over_sample
         self.fmin = fmin
 
-        self.register('mag', [None, n_octaves * 12 * over_sample], np.float32)
-        self.register('phase', [None, n_octaves * 12 * over_sample], np.float32)
+        n_bins = n_octaves * 12 * over_sample
+        self.register('mag', [None, n_bins], np.float32)
+        self.register('phase', [None, n_bins], np.float32)
 
     def transform_audio(self, y):
-
-        cqt, phase = librosa.magphase(librosa.cqt(y=y,
-                                                  sr=self.sr,
-                                                  hop_length=self.hop_length,
-                                                  fmin=self.fmin,
-                                                  n_bins=self.n_octaves *
-                                                         self.over_sample * 12,
-                                                  bins_per_octave=self.over_sample * 12,
-                                                  real=False))
-
-        return {'mag': cqt.T.astype(np.float32),
+        '''Compute the CQT
+
+        Parameters
+        ----------
+        y : np.ndarray
+            The audio buffer
+
+        Returns
+        -------
+        data : dict
+            data['mag'] : np.ndarray, shape = (n_frames, n_bins)
+                The CQT magnitude
+
+            data['phase']: np.ndarray, shape = mag.shape
+                The CQT phase
+        '''
+        cqtm, phase = magphase(cqt(y=y,
+                                   sr=self.sr,
+                                   hop_length=self.hop_length,
+                                   fmin=self.fmin,
+                                   n_bins=(self.n_octaves *
+                                           self.over_sample * 12),
+                                   bins_per_octave=(self.over_sample * 12),
+                                   real=False))
+
+        return {'mag': cqtm.T.astype(np.float32),
                 'phase': np.angle(phase).T.astype(np.float32)}
 
 
 class CQTMag(CQT):
+    '''Magnitude CQT
 
-    def __init__(self, *args, **kwargs):
+    See Also
+    --------
+    CQT
+    '''
 
+    def __init__(self, *args, **kwargs):
         super(CQTMag, self).__init__(*args, **kwargs)
         self.pop('phase')
 
     def transform_audio(self, y):
-
+        '''Compute CQT magnitude.
+
+        Parameters
+        ----------
+        y : np.ndarray
+            the audio buffer
+
+        Returns
+        -------
+        data : dict
+            data['mag'] : np.ndarray, shape=(n_frames, n_bins)
+                The CQT magnitude
+        '''
         data = super(CQTMag, self).transform_audio(y)
         data.pop('phase')
         return data
 
 
 class CQTPhaseDiff(CQT):
+    '''CQT with unwrapped phase differentials
 
+    See Also
+    --------
+    CQT
+    '''
     def __init__(self, *args, **kwargs):
-
         super(CQTPhaseDiff, self).__init__(*args, **kwargs)
         phase_field = self.pop('phase')
         self.register('dphase', phase_field.shape, phase_field.dtype)
 
     def transform_audio(self, y):
+        '''Compute the CQT with unwrapped phase
+
+        Parameters
+        ----------
+        y : np.ndarray
+            The audio buffer
+
+        Returns
+        -------
+        data : dict
+            data['mag'] : np.ndarray, shape=(n_frames, n_bins)
+                CQT magnitude
+
+            data['dphase'] : np.ndarray, shape=(n_frames, n_bins)
+                Unwrapped phase differential
+        '''
         data = super(CQTPhaseDiff, self).transform_audio(y)
         data['dphase'] = phase_diff(data.pop('phase'), axis=0)
         return data
diff --git a/pumpp/feature/fft.py b/pumpp/feature/fft.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+"""STFT feature extractors"""
 
 import numpy as np
 import librosa
@@ -9,9 +10,29 @@
 
 
 class STFT(FeatureExtractor):
+    '''Short-time Fourier Transform (STFT) with both magnitude
+    and phase.
 
-    def __init__(self, name, sr, hop_length, n_fft):
+    Attributes
+    ----------
+    name : str
+        The name of this transformer
+
+    sr : number > 0
+        The sampling rate of audio
+
+    hop_length : int > 0
+        The hop length of STFT frames
 
+    n_fft : int > 0
+        The number of FFT bins per frame
+
+    See Also
+    --------
+    STFTMag
+    STFTPhaseDiff
+    '''
+    def __init__(self, name, sr, hop_length, n_fft):
         super(STFT, self).__init__(name, sr, hop_length)
 
         self.n_fft = n_fft
@@ -20,7 +41,22 @@ def __init__(self, name, sr, hop_length, n_fft):
         self.register('phase', [None, 1 + n_fft // 2], np.float32)
 
     def transform_audio(self, y):
-
+        '''Compute the STFT magnitude and phase.
+
+        Parameters
+        ----------
+        y : np.ndarray
+            The audio buffer
+
+        Returns
+        -------
+        data : dict
+            data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
+                STFT magnitude
+
+            data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
+                STFT phase
+        '''
         mag, phase = librosa.magphase(librosa.stft(y,
                                                    hop_length=self.hop_length,
                                                    n_fft=self.n_fft,
@@ -29,29 +65,64 @@ def transform_audio(self, y):
 
 
 class STFTPhaseDiff(STFT):
+    '''STFT with phase differentials
 
+    See Also
+    --------
+    STFT
+    '''
     def __init__(self, *args, **kwargs):
-
         super(STFTPhaseDiff, self).__init__(*args, **kwargs)
         phase_field = self.pop('phase')
         self.register('dphase', phase_field.shape, phase_field.dtype)
 
     def transform_audio(self, y):
-
+        '''Compute the STFT with phase differentials.
+
+        Parameters
+        ----------
+        y : np.ndarray
+            the audio buffer
+
+        Returns
+        -------
+        data : dict
+            data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
+                The STFT magnitude
+
+            data['dphase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
+                The unwrapped phase differential
+        '''
         data = super(STFTPhaseDiff, self).transform_audio(y)
         data['dphase'] = phase_diff(data.pop('phase'), axis=0)
         return data
 
 
 class STFTMag(STFT):
+    '''STFT with only magnitude.
 
+    See Also
+    --------
+    STFT
+    '''
     def __init__(self, *args, **kwargs):
-
         super(STFTMag, self).__init__(*args, **kwargs)
         self.pop('phase')
 
     def transform_audio(self, y):
-
+        '''Compute the STFT
+
+        Parameters
+        ----------
+        y : np.ndarray
+            The audio buffer
+
+        Returns
+        -------
+        data : dict
+            data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
+                The STFT magnitude
+        '''
         data = super(STFTMag, self).transform_audio(y)
         data.pop('phase')
 

diff --git a/pumpp/feature/mel.py b/pumpp/feature/mel.py
@@ -1,8 +1,8 @@
-
 #!/usr/bin/env python
+"""Mel spectrogram"""
 
 import numpy as np
-import librosa
+from librosa.feature import melspectrogram
 
 from .base import FeatureExtractor
 
@@ -34,7 +34,6 @@ class Mel(FeatureExtractor):
         Defaults to `0.5 * sr`
     '''
     def __init__(self, name, sr, hop_length, n_fft, n_mels, fmax=None):
-
         super(Mel, self).__init__(name, sr, hop_length)
 
         self.n_fft = n_fft
@@ -44,9 +43,23 @@ def __init__(self, name, sr, hop_length, n_fft, n_mels, fmax=None):
         self.register('mag', [None, n_mels], np.float32)
 
     def transform_audio(self, y):
-        mel = np.sqrt(librosa.feature.melspectrogram(y=y, sr=self.sr, n_fft=self.n_fft,
-                                                     hop_length=self.hop_length,
-                                                     n_mels=self.n_mels,
-                                                     fmax=self.fmax)).astype(np.float32)
+        '''Compute the Mel spectrogram
+
+        Parameters
+        ----------
+        y : np.ndarray
+            The audio buffer
+
+        Returns
+        -------
+        data : dict
+            data['mag'] : np.ndarray, shape=(n_frames, n_mels)
+                The Mel spectrogram
+        '''
+        mel = np.sqrt(melspectrogram(y=y, sr=self.sr,
+                                     n_fft=self.n_fft,
+                                     hop_length=self.hop_length,
+                                     n_mels=self.n_mels,
+                                     fmax=self.fmax)).astype(np.float32)
 
         return {'mag': mel.T}