Merge pull request #88 from bmcfee/time-coding

added time position coding
bmcfee · Jul 27, 2017 · 5cb46df · 5cb46df
2 parents b84c775 + ebf30c7
commit 5cb46df
Show file tree

Hide file tree

Showing 5 changed files with 99 additions and 6 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -3,11 +3,6 @@ language: python
 # sudo false implies containerized builds
 sudo: false
 
-addons:
-  apt:
-    packages:
-    - ffmpeg
-
 notifications:
     email: false
 

diff --git a/.travis_dependencies.sh b/.travis_dependencies.sh
@@ -34,7 +34,7 @@ if [ ! -d "$src" ]; then
 
         source activate $ENV_NAME
 
-        conda install -c conda-forge librosa
+        conda install -c conda-forge ffmpeg librosa
         pip install python-coveralls pytest-faulthandler
 
         source deactivate

diff --git a/pumpp/feature/__init__.py b/pumpp/feature/__init__.py
@@ -20,10 +20,12 @@
     Mel
     Tempogram
     TempoScale
+    TimePosition
 '''
 
 from .base import *
 from .cqt import *
 from .fft import *
 from .mel import *
 from .rhythm import *
+from .time import *
diff --git a/pumpp/feature/time.py b/pumpp/feature/time.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+'''Time features'''
+
+import numpy as np
+
+from librosa import get_duration
+
+from .base import FeatureExtractor
+
+__all__ = ['TimePosition']
+
+
+class TimePosition(FeatureExtractor):
+    '''TimePosition: encode frame position as features.
+
+    Attributes
+    ----------
+    name : str
+        The name of this feature extractor
+
+    sr : number > 0
+        The sampling rate of audio
+
+    hop_length : int > 0
+        The hop length of analysis windows
+    '''
+
+    def __init__(self, name, sr, hop_length, conv=None):
+        super(TimePosition, self).__init__(name, sr, hop_length, conv=conv)
+
+        self.register('relative', 2, np.float32)
+        self.register('absolute', 2, np.float32)
+
+    def transform_audio(self, y):
+        '''Compute the time position encoding
+
+        Parameters
+        ----------
+        y : np.ndarray
+            Audio buffer
+
+        Returns
+        -------
+        data : dict
+            data['relative'] = np.ndarray, shape=(n_frames, 2)
+            data['absolute'] = np.ndarray, shape=(n_frames, 2)
+
+                Relative and absolute time positional encodings.
+        '''
+
+        duration = get_duration(y=y, sr=self.sr)
+        n_frames = self.n_frames(duration)
+
+        relative = np.zeros((n_frames, 2), dtype=np.float32)
+        relative[:, 0] = np.cos(np.pi * np.linspace(0, 1, num=n_frames))
+        relative[:, 1] = np.sin(np.pi * np.linspace(0, 1, num=n_frames))
+
+        absolute = relative * np.sqrt(duration)
+
+        return {'relative': relative[self.idx],
+                'absolute': absolute[self.idx]}
diff --git a/tests/test_feature.py b/tests/test_feature.py
@@ -531,3 +531,38 @@ def test_feature_hcqtphasediff(audio, SR, HOP_LENGTH, over_sample, n_octaves,
     for key in ext.fields:
         assert shape_match(output[key].shape[1:], ext.fields[key].shape)
         assert type_match(output[key].dtype, ext.fields[key].dtype)
+
+
+# Time Features
+
+def test_feature_time_fields(SR, HOP_LENGTH, conv):
+
+    ext = pumpp.feature.TimePosition(name='time',
+                                     sr=SR,
+                                     hop_length=HOP_LENGTH,
+                                     conv=conv)
+
+    assert set(ext.fields.keys()) == set(['time/absolute', 'time/relative'])
+
+    __check_shape(ext.fields, 'time/absolute', 2, conv)
+    __check_shape(ext.fields, 'time/relative', 2, conv)
+
+    assert ext.fields['time/absolute'].dtype is np.float32
+    assert ext.fields['time/relative'].dtype is np.float32
+
+
+def test_feature_time(audio, SR, HOP_LENGTH, conv):
+
+    ext = pumpp.feature.TimePosition(name='time',
+                                     sr=SR,
+                                     hop_length=HOP_LENGTH,
+                                     conv=conv)
+
+    output = ext.transform(**audio)
+
+    # Check the fields
+    assert set(output.keys()) == set(ext.fields.keys())
+
+    for key in ext.fields:
+        assert shape_match(output[key].shape[1:], ext.fields[key].shape)
+        assert type_match(output[key].dtype, ext.fields[key].dtype)