Skip to content

Commit

Permalink
Merge pull request #61 from bmcfee/feature-alignment
Browse files Browse the repository at this point in the history
normalized feature durations. fixes #60
  • Loading branch information
bmcfee committed Apr 5, 2017
2 parents 16807e4 + f99d696 commit 6c2284b
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 26 deletions.
25 changes: 22 additions & 3 deletions pumpp/feature/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
'''Feature extraction base class'''

import numpy as np
import librosa
from librosa import resample, time_to_frames

from ..base import Scope
from ..exceptions import ParameterError
Expand Down Expand Up @@ -38,7 +38,8 @@ def __init__(self, name, sr, hop_length, conv=None):

if conv not in ('tf', 'th', 'channels_last', 'channels_first', None):
raise ParameterError('conv="{}", must be one of '
'("channels_last", "tf", "channels_first", "th", None)'.format(conv))
'("channels_last", "tf", '
'"channels_first", "th", None)'.format(conv))

self.sr = sr
self.hop_length = hop_length
Expand Down Expand Up @@ -88,7 +89,7 @@ def transform(self, y, sr):
transform_audio
'''
if sr != self.sr:
y = librosa.resample(y, sr, self.sr)
y = resample(y, sr, self.sr)

return self.merge([self.transform_audio(y)])

Expand Down Expand Up @@ -144,3 +145,21 @@ def layers(self):
dtype=self.fields[key].dtype)

return L

def n_frames(self, duration):
'''Get the number of frames for a given duration
Parameters
----------
duration : number >= 0
The duration, in seconds
Returns
-------
n_frames : int >= 0
The number of frames at this extractor's sampling rate and
hop length
'''

return int(time_to_frames(duration, sr=self.sr,
hop_length=self.hop_length))
38 changes: 23 additions & 15 deletions pumpp/feature/cqt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
'''CQT features'''

import numpy as np
from librosa import cqt, magphase, note_to_hz, amplitude_to_db
from librosa import cqt, magphase, note_to_hz
from librosa import amplitude_to_db, get_duration
from librosa.util import fix_length

from .base import FeatureExtractor
from ..exceptions import ParameterError
Expand Down Expand Up @@ -73,13 +75,16 @@ def transform_audio(self, y):
data['phase']: np.ndarray, shape = mag.shape
The CQT phase
'''
cqtm, phase = magphase(cqt(y=y,
sr=self.sr,
hop_length=self.hop_length,
fmin=self.fmin,
n_bins=(self.n_octaves *
self.over_sample * 12),
bins_per_octave=(self.over_sample * 12)))
n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
fmin=self.fmin,
n_bins=(self.n_octaves * self.over_sample * 12),
bins_per_octave=(self.over_sample * 12))

C = fix_length(C, n_frames)

cqtm, phase = magphase(C)
if self.log:
cqtm = amplitude_to_db(cqtm, ref=np.max)

Expand Down Expand Up @@ -242,14 +247,17 @@ def transform_audio(self, y):
'''
cqtm, phase = [], []

n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

for h in self.harmonics:
C, P = magphase(cqt(y=y,
sr=self.sr,
hop_length=self.hop_length,
fmin=self.fmin * h,
n_bins=(self.n_octaves *
self.over_sample * 12),
bins_per_octave=(self.over_sample * 12)))
C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
fmin=self.fmin * h,
n_bins=(self.n_octaves * self.over_sample * 12),
bins_per_octave=(self.over_sample * 12))

C = fix_length(C, n_frames)

C, P = magphase(C)
if self.log:
C = amplitude_to_db(C, ref=np.max)
cqtm.append(C)
Expand Down
18 changes: 12 additions & 6 deletions pumpp/feature/fft.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
"""STFT feature extractors"""

import numpy as np
import librosa
from librosa import stft, magphase
from librosa import amplitude_to_db, get_duration
from librosa.util import fix_length

from .base import FeatureExtractor

Expand Down Expand Up @@ -63,12 +65,16 @@ def transform_audio(self, y):
data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
STFT phase
'''
mag, phase = librosa.magphase(librosa.stft(y,
hop_length=self.hop_length,
n_fft=self.n_fft,
dtype=np.float32))
n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

D = stft(y, hop_length=self.hop_length,
n_fft=self.n_fft, dtype=np.float32)

D = fix_length(D, n_frames)

mag, phase = magphase(D)
if self.log:
mag = librosa.amplitude_to_db(mag, ref=np.max)
mag = amplitude_to_db(mag, ref=np.max)

return {'mag': mag.T[self.idx],
'phase': np.angle(phase.T)[self.idx]}
Expand Down
9 changes: 8 additions & 1 deletion pumpp/feature/mel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import numpy as np
from librosa.feature import melspectrogram
from librosa import amplitude_to_db, get_duration
from librosa.util import fix_length

from .base import FeatureExtractor

Expand Down Expand Up @@ -63,12 +65,17 @@ def transform_audio(self, y):
data['mag'] : np.ndarray, shape=(n_frames, n_mels)
The Mel spectrogram
'''
n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

mel = np.sqrt(melspectrogram(y=y, sr=self.sr,
n_fft=self.n_fft,
hop_length=self.hop_length,
n_mels=self.n_mels,
fmax=self.fmax)).astype(np.float32)

mel = fix_length(mel, n_frames)

if self.log:
mel = librosa.amplitude_to_db(mel, ref=np.max)
mel = amplitude_to_db(mel, ref=np.max)

return {'mag': mel.T[self.idx]}
5 changes: 5 additions & 0 deletions pumpp/feature/rhythm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import numpy as np
from librosa import fmt
from librosa.feature import tempogram
from librosa import get_duration
from librosa.util import fix_length

from .base import FeatureExtractor

Expand Down Expand Up @@ -48,10 +50,13 @@ def transform_audio(self, y):
data['tempogram'] : np.ndarray, shape=(n_frames, win_length)
The tempogram
'''
n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

tgram = tempogram(y=y, sr=self.sr,
hop_length=self.hop_length,
win_length=self.win_length).astype(np.float32)

tgram = fix_length(tgram, n_frames)
return {'tempogram': tgram.T[self.idx]}


Expand Down
1 change: 0 additions & 1 deletion pumpp/task/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,6 @@ def encode_events(self, duration, events, values, dtype=np.bool):
target : ndarray, shape=(n_frames, n_values)
'''

# FIXME: support sparse encoding
frames = time_to_frames(events, sr=self.sr,
hop_length=self.hop_length)

Expand Down

0 comments on commit 6c2284b

Please sign in to comment.