Skip to content

Commit

Permalink
Merge pull request #28 from bmcfee/chord-as-tag
Browse files Browse the repository at this point in the history
Chord tag encoding
  • Loading branch information
bmcfee committed Feb 22, 2017
2 parents ceae0a2 + 4aa8c7e commit bfcbddf
Show file tree
Hide file tree
Showing 4 changed files with 422 additions and 17 deletions.
11 changes: 9 additions & 2 deletions pumpp/task/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ def encode_events(self, duration, events, values, dtype=np.bool):

return target

def encode_intervals(self, duration, intervals, values, dtype=np.bool):
def encode_intervals(self, duration, intervals, values, dtype=np.bool,
multi=True):
'''Encode labeled intervals as a time-series matrix.
Parameters
Expand All @@ -181,6 +182,9 @@ def encode_intervals(self, duration, intervals, values, dtype=np.bool):
dtype : np.dtype
The desired output type
multi : bool
If `True`, allow multiple labels per interval.
Returns
-------
target : np.ndarray, shape=(duration * sr / hop_length, m)
Expand All @@ -199,7 +203,10 @@ def encode_intervals(self, duration, intervals, values, dtype=np.bool):
target.fill(fill_value(dtype))

for column, interval in zip(values, frames):
target[interval[0]:interval[1]] += column
if multi:
target[interval[0]:interval[1]] += column
else:
target[interval[0]:interval[1]] = column

return target

Expand Down
227 changes: 226 additions & 1 deletion pumpp/task/chord.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,20 @@
# -*- encoding: utf-8 -*-
'''Chord recognition task transformer'''

import re
from itertools import product

import numpy as np
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import MultiLabelBinarizer

import mir_eval
import jams

from .base import BaseTaskTransformer
from ..exceptions import ParameterError

__all__ = ['ChordTransformer', 'SimpleChordTransformer']
__all__ = ['ChordTransformer', 'SimpleChordTransformer', 'ChordTagTransformer']


def _pad_nochord(target, axis=-1):
Expand Down Expand Up @@ -216,3 +223,221 @@ def transform_annotation(self, ann, duration):

def inverse(self, *args, **kwargs):
raise NotImplementedError('SimpleChord cannot be inverted')


'''A list of normalized pitch class names'''
PITCHES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']


'''A mapping of chord quality encodings to their names'''
QUALITIES = {
0b000100000000: 'min',
0b000010000000: 'maj',
0b000100010000: 'min',
0b000010010000: 'maj',
0b000100100000: 'dim',
0b000010001000: 'aug',
0b000100010010: 'min7',
0b000010010001: 'maj7',
0b000010010010: '7',
0b000100100100: 'dim7',
0b000100100010: 'hdim7',
0b000100010001: 'minmaj7',
0b000100010100: 'min6',
0b000010010100: 'maj6',
0b001000010000: 'sus2',
0b000001010000: 'sus4'
}


class ChordTagTransformer(BaseTaskTransformer):
'''Chord transformer that uses a tag-space encoding for chord labels.
Attributes
----------
name : str
name of the transformer
vocab : str
A string of chord quality indicators to include:
- '3': maj/min
- '5': '3' + aug/dim
- '6': '3' + '5' + maj6/min6
- '7': '3' + '5' + '6' + 7/min7/maj7/dim7/hdim7/minmaj7
- 's': sus2/sus4
Note: 5 requires 3, 6 requires 5, 7 requires 6.
nochord : str
String to use for no-chord symbols
sr : number > 0
Sampling rate of audio
hop_length : int > 0
Hop length for annotation frames
See Also
--------
ChordTransformer
SimpleChordTransformer
'''
def __init__(self, name='chord', vocab='3567s', nochord='N',
sr=22050, hop_length=512):

super(ChordTagTransformer, self).__init__(name=name,
namespace='chord',
sr=sr,
hop_length=hop_length)

# Stringify and lowercase
if set(vocab) - set('3567s'):
raise ParameterError('Invalid vocabulary string: {}'.format(vocab))

if '5' in vocab and '3' not in vocab:
raise ParameterError('Invalid vocabulary string: {}'.format(vocab))

if '6' in vocab and '5' not in vocab:
raise ParameterError('Invalid vocabulary string: {}'.format(vocab))

if '7' in vocab and '6' not in vocab:
raise ParameterError('Invalid vocabulary string: {}'.format(vocab))

self.vocab = vocab.lower()
self.nochord = nochord
labels = self.vocabulary()

self.encoder = LabelBinarizer()
self.encoder.fit(labels)
self._classes = set(self.encoder.classes_)

# Construct the quality mask for chord encoding
self.mask_ = 0b000000000000
if '3' in self.vocab:
self.mask_ |= 0b000110000000
if '5' in self.vocab:
self.mask_ |= 0b000110111000
if '6' in self.vocab:
self.mask_ |= 0b000110010100
if '7' in self.vocab:
self.mask_ |= 0b000110110111
if 's' in self.vocab:
self.mask_ |= 0b001001010000

self.register('chord', [None, len(self._classes)], np.bool)

def empty(self, duration):
'''Empty chord annotations
Parameters
----------
duration : number
The length (in seconds) of the empty annotation
Returns
-------
ann : jams.Annotation
A chord annotation consisting of a single `no-chord` observation.
'''
ann = super(ChordTagTransformer, self).empty(duration)

ann.append(time=0,
duration=duration,
value='N', confidence=0)

return ann

def vocabulary(self):
qualities = []

if '3' in self.vocab or '5' in self.vocab:
qualities.extend(['min', 'maj'])

if '5' in self.vocab:
qualities.extend(['dim', 'aug'])

if '6' in self.vocab:
qualities.extend(['min6', 'maj6'])

if '7' in self.vocab:
qualities.extend(['min7', 'maj7', '7', 'dim7', 'hdim7', 'minmaj7'])

if 's' in self.vocab:
qualities.extend(['sus2', 'sus4'])

labels = [self.nochord]

for chord in product(PITCHES, qualities):
labels.append('{}:{}'.format(*chord))

return labels

def simplify(self, chord):
'''Simplify a chord string down to the vocabulary space'''
# Drop inversions
chord = re.sub(r'/.*$', r'', chord)
# Drop any additional or suppressed tones
chord = re.sub(r'\(.*?\)', r'', chord)
# Drop dangling : indicators
chord = re.sub(r':$', r'', chord)

# Encode the chord
root, pitches, _ = mir_eval.chord.encode(chord)

# Build the query
# To map the binary vector pitches down to bit masked integer,
# we just dot against powers of 2
P = 2**np.arange(12, dtype=int)
query = self.mask_ & pitches[::-1].dot(P)

if query not in QUALITIES:
# TODO: check for non-zero pitches here
return self.nochord

return '{}:{}'.format(PITCHES[root], QUALITIES[query])

def transform_annotation(self, ann, duration):
'''Transform an annotation to chord-tag encoding
Parameters
----------
ann : jams.Annotation
The annotation to convert
duration : number > 0
The duration of the track
Returns
-------
data : dict
data['chord'] : np.ndarray, shape=(n, n_labels)
A time-varying binary encoding of the chords
'''

intervals, values = ann.data.to_interval_values()

chords = []
for v in values:
chords.extend(self.encoder.transform([self.simplify(v)]))

chords = np.asarray(chords)
target = self.encode_intervals(duration, intervals, chords,
multi=False)

return {'chord': target}

def inverse(self, encoded, duration=None):
'''Inverse transformation'''

ann = jams.Annotation(self.namespace, duration=duration)

for start, end, value in self.decode_intervals(encoded,
duration=duration):
value_dec = self.encoder.inverse_transform(np.atleast_2d(value))

for vd in value_dec:
ann.append(time=start, duration=end-start, value=vd)

return ann
16 changes: 16 additions & 0 deletions tests/test_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,19 @@ def test_decode_simplechord(sr, hop_length, ann_chord):
data2 = tc.transform_annotation(inverse, ann_chord.duration)

assert np.allclose(data['pitch'], data2['pitch'])


def test_decode_chordtag(sr, hop_length, ann_chord):

# This test encodes an annotation, decodes it, and then re-encodes it
# It passes if the re-encoded version matches the initial encoding
tc = pumpp.task.ChordTagTransformer('chord', vocab='3567s',
hop_length=hop_length,
sr=sr)

data = tc.transform_annotation(ann_chord, ann_chord.duration)

inverse = tc.inverse(data['chord'], duration=ann_chord.duration)
data2 = tc.transform_annotation(inverse, ann_chord.duration)

assert np.allclose(data['chord'], data2['chord'])

0 comments on commit bfcbddf

Please sign in to comment.