Skip to content

Commit

Permalink
Merge pull request #91 from bmcfee/confidence-rated-prediction
Browse files Browse the repository at this point in the history
Confidence rated prediction
  • Loading branch information
bmcfee committed Oct 12, 2017
2 parents 460d076 + beb77dc commit 0d6ef78
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 11 deletions.
17 changes: 13 additions & 4 deletions pumpp/task/beat.py
Expand Up @@ -4,6 +4,7 @@

import numpy as np

from librosa import time_to_frames
import jams
from mir_eval.util import boundaries_to_intervals, adjust_intervals
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
Expand Down Expand Up @@ -95,7 +96,11 @@ def inverse(self, encoded, downbeat=None, duration=None):

ann = jams.Annotation(namespace=self.namespace, duration=duration)

beat_times = [t for t, _ in self.decode_events(encoded) if _]
beat_times = np.asarray([t for t, _ in self.decode_events(encoded) if _])
beat_frames = time_to_frames(beat_times,
sr=self.sr,
hop_length=self.hop_length)

if downbeat is not None:
downbeat_times = set([t for t, _ in self.decode_events(downbeat)
if _])
Expand All @@ -106,12 +111,16 @@ def inverse(self, encoded, downbeat=None, duration=None):
pickup_beats = 0

value = - pickup_beats - 1
for beat in beat_times:
if beat in downbeat_times:
for beat_t, beat_f in zip(beat_times, beat_frames):
if beat_t in downbeat_times:
value = 1
else:
value += 1
ann.append(time=beat, duration=0, value=value)
confidence = encoded[beat_f]
ann.append(time=beat_t,
duration=0,
value=value,
confidence=confidence)

return ann

Expand Down
21 changes: 20 additions & 1 deletion pumpp/task/chord.py
Expand Up @@ -9,6 +9,7 @@
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.preprocessing import MultiLabelBinarizer

from librosa import time_to_frames
import mir_eval
import jams

Expand Down Expand Up @@ -492,12 +493,30 @@ def inverse(self, encoded, duration=None):
duration=duration,
multi=False,
sparse=self.sparse):

# Map start:end to frames
f_start, f_end = time_to_frames([start, end],
sr=self.sr,
hop_length=self.hop_length)

# Reverse the index
if self.sparse:
# Compute the confidence
if encoded.shape[1] == 1:
# This case is for full-confidence prediction (just the index)
confidence = 1.
else:
confidence = np.mean(encoded[f_start:f_end+1, value])

value_dec = self.encoder.inverse_transform(value)
else:
confidence = np.mean(encoded[f_start:f_end+1, np.argmax(value)])
value_dec = self.encoder.inverse_transform(np.atleast_2d(value))

for vd in value_dec:
ann.append(time=start, duration=end-start, value=vd)
ann.append(time=start,
duration=end-start,
value=vd,
confidence=float(confidence))

return ann
29 changes: 23 additions & 6 deletions pumpp/task/tags.py
Expand Up @@ -5,6 +5,8 @@
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer

from librosa import time_to_frames

import jams

from .base import BaseTaskTransformer
Expand Down Expand Up @@ -106,10 +108,20 @@ def inverse(self, encoded, duration=None):
ann = jams.Annotation(namespace=self.namespace, duration=duration)
for start, end, value in self.decode_intervals(encoded,
duration=duration):
# Map start:end to frames
f_start, f_end = time_to_frames([start, end],
sr=self.sr,
hop_length=self.hop_length)

confidence = np.mean(encoded[f_start:f_end+1, value])

value_dec = self.encoder.inverse_transform(np.atleast_2d(value))[0]

for vd in value_dec:
ann.append(time=start, duration=end-start, value=vd)
ann.append(time=start,
duration=end-start,
value=vd,
confidence=confidence)

return ann

Expand Down Expand Up @@ -185,9 +197,14 @@ def inverse(self, encoded, duration=None):
ann = jams.Annotation(namespace=self.namespace, duration=duration)

if np.isrealobj(encoded):
encoded = (encoded >= 0.5)

for vd in self.encoder.inverse_transform(np.atleast_2d(encoded))[0]:
ann.append(time=0, duration=duration, value=vd)

detected = (encoded >= 0.5)
else:
detected = encoded

for vd in self.encoder.inverse_transform(np.atleast_2d(detected))[0]:
vid = np.flatnonzero(self.encoder.transform(np.atleast_2d(vd)))
ann.append(time=0,
duration=duration,
value=vd,
confidence=encoded[vid])
return ann
28 changes: 28 additions & 0 deletions tests/test_decode.py
Expand Up @@ -101,6 +101,8 @@ def test_decode_tags_dynamic_hard(sr, hop_length, ann_tag):
data = tc.transform_annotation(ann_tag, ann_tag.duration)

inverse = tc.inverse(data['tags'], duration=ann_tag.duration)
for obs in inverse:
assert 0. <= obs.confidence <= 1.
data2 = tc.transform_annotation(inverse, ann_tag.duration)

assert np.allclose(data['tags'], data2['tags'])
Expand All @@ -119,6 +121,8 @@ def test_decode_tags_dynamic_soft(sr, hop_length, ann_tag):
# Soften the data, but preserve the decisions
tags_predict = data['tags'] * 0.51 + 0.1
inverse = tc.inverse(tags_predict, duration=ann_tag.duration)
for obs in inverse:
assert 0. <= obs.confidence <= 1.
data2 = tc.transform_annotation(inverse, ann_tag.duration)

assert np.allclose(data['tags'], data2['tags'])
Expand All @@ -130,6 +134,8 @@ def test_decode_tags_static_hard(ann_tag):

data = tc.transform_annotation(ann_tag, ann_tag.duration)
inverse = tc.inverse(data['tags'], ann_tag.duration)
for obs in inverse:
assert 0. <= obs.confidence <= 1.
data2 = tc.transform_annotation(inverse, ann_tag.duration)

assert np.allclose(data['tags'], data2['tags'])
Expand All @@ -143,6 +149,8 @@ def test_decode_tags_static_soft(ann_tag):
tags_predict = data['tags'] * 0.51 + 0.1

inverse = tc.inverse(tags_predict, ann_tag.duration)
for obs in inverse:
assert 0. <= obs.confidence <= 1.
data2 = tc.transform_annotation(inverse, ann_tag.duration)

assert np.allclose(data['tags'], data2['tags'])
Expand All @@ -154,6 +162,8 @@ def test_decode_beat_hard(sr, hop_length, ann_beat):

data = tc.transform_annotation(ann_beat, ann_beat.duration)
inverse = tc.inverse(data['beat'], duration=ann_beat.duration)
for obs in inverse:
assert 0. <= obs.confidence <= 1.
data2 = tc.transform_annotation(inverse, ann_beat.duration)

assert np.allclose(data['beat'], data2['beat'])
Expand All @@ -167,6 +177,8 @@ def test_decode_beat_soft(sr, hop_length, ann_beat):
beat_pred = data['beat'] * 0.51 + 0.1

inverse = tc.inverse(beat_pred, duration=ann_beat.duration)
for obs in inverse:
assert 0. <= obs.confidence <= 1.
data2 = tc.transform_annotation(inverse, ann_beat.duration)

assert np.allclose(data['beat'], data2['beat'])
Expand All @@ -179,6 +191,8 @@ def test_decode_beat_downbeat_hard(sr, hop_length, ann_beat):
data = tc.transform_annotation(ann_beat, ann_beat.duration)
inverse = tc.inverse(data['beat'], downbeat=data['downbeat'],
duration=ann_beat.duration)
for obs in inverse:
assert 0. <= obs.confidence <= 1.
data2 = tc.transform_annotation(inverse, ann_beat.duration)

assert np.allclose(data['beat'], data2['beat'])
Expand All @@ -193,6 +207,8 @@ def test_decode_beat_downbeat_soft(sr, hop_length, ann_beat):
dbeat_pred = data['downbeat'] * 0.51 + 0.1
inverse = tc.inverse(beat_pred, downbeat=dbeat_pred,
duration=ann_beat.duration)
for obs in inverse:
assert 0. <= obs.confidence <= 1.
data2 = tc.transform_annotation(inverse, ann_beat.duration)

assert np.allclose(data['beat'], data2['beat'])
Expand Down Expand Up @@ -250,6 +266,8 @@ def test_decode_chordtag_hard_dense(sr, hop_length, ann_chord):
data = tc.transform_annotation(ann_chord, ann_chord.duration)

inverse = tc.inverse(data['chord'], duration=ann_chord.duration)
for obs in inverse:
assert 0 <= obs.confidence <= 1.
data2 = tc.transform_annotation(inverse, ann_chord.duration)

assert np.allclose(data['chord'], data2['chord'])
Expand All @@ -267,6 +285,10 @@ def test_decode_chordtag_soft_dense(sr, hop_length, ann_chord):

chord_predict = data['chord'] * 0.51 + 0.1
inverse = tc.inverse(chord_predict, duration=ann_chord.duration)

for obs in inverse:
assert 0 <= obs.confidence <= 1.

data2 = tc.transform_annotation(inverse, ann_chord.duration)

assert np.allclose(data['chord'], data2['chord'])
Expand All @@ -283,6 +305,8 @@ def test_decode_chordtag_hard_sparse_sparse(sr, hop_length, ann_chord):
data = tc.transform_annotation(ann_chord, ann_chord.duration)

inverse = tc.inverse(data['chord'], duration=ann_chord.duration)
for obs in inverse:
assert 0 <= obs.confidence <= 1.
data2 = tc.transform_annotation(inverse, ann_chord.duration)

assert np.allclose(data['chord'], data2['chord'])
Expand All @@ -305,6 +329,8 @@ def test_decode_chordtag_hard_dense_sparse(sr, hop_length, ann_chord):

# Invert using the sparse encoder
inverse = tcs.inverse(data['chord'], duration=ann_chord.duration)
for obs in inverse:
assert 0 <= obs.confidence <= 1.
data2 = tcs.transform_annotation(inverse, ann_chord.duration)

dense_positions = np.where(data['chord'])[1]
Expand All @@ -330,6 +356,8 @@ def test_decode_chordtag_soft_dense_sparse(sr, hop_length, ann_chord):
chord_predict = data['chord'] * 0.51 + 0.1
# Invert using the sparse encoder
inverse = tcs.inverse(chord_predict, duration=ann_chord.duration)
for obs in inverse:
assert 0 <= obs.confidence <= 1.
data2 = tcs.transform_annotation(inverse, ann_chord.duration)

dense_positions = np.where(data['chord'])[1]
Expand Down

0 comments on commit 0d6ef78

Please sign in to comment.