In [2]:
import vamp
import librosa

What Vamp plugins do we have installed?

In [3]:
vamp.list_plugins()

['bbc-vamp-plugins:bbc-energy',
 'bbc-vamp-plugins:bbc-intensity',
 'bbc-vamp-plugins:bbc-peaks',
 'bbc-vamp-plugins:bbc-rhythm',
 'bbc-vamp-plugins:bbc-spectral-contrast',
 'bbc-vamp-plugins:bbc-spectral-flux',
 'bbc-vamp-plugins:bbc-speechmusic-segmenter',
 'qm-vamp-plugins:qm-adaptivespectrogram',
 'qm-vamp-plugins:qm-barbeattracker',
 'qm-vamp-plugins:qm-chromagram',
 'qm-vamp-plugins:qm-constantq',
 'qm-vamp-plugins:qm-dwt',
 'qm-vamp-plugins:qm-keydetector',
 'qm-vamp-plugins:qm-mfcc',
 'qm-vamp-plugins:qm-onsetdetector',
 'qm-vamp-plugins:qm-segmenter',
 'qm-vamp-plugins:qm-similarity',
 'qm-vamp-plugins:qm-tempotracker',
 'qm-vamp-plugins:qm-tonalchange',
 'qm-vamp-plugins:qm-transcription']

OK, let's load up a file using `librosa`

In [55]:
AUDIOFILE="C:\\Users\\mike\\Downloads\\01 - When I'm Gone.mp3"

In [80]:
duration = librosa.get_duration(path=AUDIOFILE)

data, rate = librosa.load(AUDIOFILE, sr=None, mono=False)
if data.ndim == 1:
    channels = 1
    n_samples = len(data)
else:
    channels, n_samples = data.shape

How much data do we have, and what was the sample rate?

In [81]:
rate, duration, channels, n_samples

(44100, 248.71290249433108, 2, 10968239)

In [134]:
librosa.feature.tempo(y=data, sr=rate)

array([[123.046875],
       [123.046875]])

## Tempo detection

How does the tempo detection plugin do?

In [100]:
tempos = vamp.collect(data, rate*2, "qm-vamp-plugins:qm-tempotracker")
tempos=tempos['list']
print(len(tempos))
print(tempos[:16])

159
[{'timestamp':  0.603718821, 'label': '78.30 bpm'}, {'timestamp':  1.369977324, 'label': '86.13 bpm'}, {'timestamp':  2.066575963, 'label': '86.13 bpm'}, {'timestamp':  2.763174603, 'label': '77.13 bpm'}, {'timestamp':  3.541043084, 'label': '77.13 bpm'}, {'timestamp':  4.318911564, 'label': '76.00 bpm'}, {'timestamp':  5.108390022, 'label': '77.13 bpm'}, {'timestamp':  5.886258503, 'label': '74.90 bpm'}, {'timestamp':  6.687346938, 'label': '76.00 bpm'}, {'timestamp':  7.476825397, 'label': '78.30 bpm'}, {'timestamp':  8.243083900, 'label': '77.13 bpm'}, {'timestamp':  9.020952381, 'label': '76.00 bpm'}, {'timestamp':  9.810430839, 'label': '79.51 bpm'}, {'timestamp':  10.565079365, 'label': '79.51 bpm'}, {'timestamp':  11.319727891, 'label': '71.78 bpm'}, {'timestamp':  12.155646258, 'label': '66.26 bpm'}]


For some reason, we need to pass *double* the sample rate reported by librosa. I think this is something to do with the fact that the QM Vamp analysers work on a single channel of audio, but I'm not sure.

In [83]:
[(x['timestamp'],x['label']) for x in tempos]

[( 0.603718821, '78.30 bpm'),
 ( 1.369977324, '86.13 bpm'),
 ( 2.066575963, '86.13 bpm'),
 ( 2.763174603, '77.13 bpm'),
 ( 3.541043084, '77.13 bpm'),
 ( 4.318911564, '76.00 bpm'),
 ( 5.108390022, '77.13 bpm'),
 ( 5.886258503, '74.90 bpm'),
 ( 6.687346938, '76.00 bpm'),
 ( 7.476825397, '78.30 bpm'),
 ( 8.243083900, '77.13 bpm'),
 ( 9.020952381, '76.00 bpm'),
 ( 9.810430839, '79.51 bpm'),
 ( 10.565079365, '79.51 bpm'),
 ( 11.319727891, '71.78 bpm'),
 ( 12.155646258, '66.26 bpm'),
 ( 13.061224489, '77.13 bpm'),
 ( 13.839092970, '68.91 bpm'),
 ( 14.709841270, '65.42 bpm'),
 ( 15.627029478, '68.00 bpm'),
 ( 16.509387755, '68.91 bpm'),
 ( 17.380136054, '78.30 bpm'),
 ( 18.146394557, '74.90 bpm'),
 ( 18.947482993, '74.90 bpm'),
 ( 19.748571428, '73.83 bpm'),
 ( 20.561269841, '73.83 bpm'),
 ( 21.373968254, '80.75 bpm'),
 ( 22.117006802, '72.79 bpm'),
 ( 22.941315192, '76.00 bpm'),
 ( 23.730793650, '76.00 bpm'),
 ( 24.520272109, '76.00 bpm'),
 ( 25.309750567, '77.13 bpm'),
 ( 26.087619047, '77.

This isn't bad: we know the BPM is around 76-77.

In [101]:
tempo_vals = [float(x['label'].rstrip(" bpm")) for x in tempos if x['label']]
mean_tempo = sum(tempo_vals) / len(tempo_vals)
mean_tempo

76.95550632911386

## Bars and Beats

How about bar and beat detection?

In [107]:
beats = vamp.collect(data, rate, "qm-vamp-plugins:qm-barbeattracker")
beats = beats["list"]
print(len(beats))
print(beats[:16])

538
[{'timestamp':  0.034829932, 'label': '3'}, {'timestamp':  0.603718821, 'label': '4'}, {'timestamp':  1.207437642, 'label': '1'}, {'timestamp':  1.660226757, 'label': '2'}, {'timestamp':  2.171065760, 'label': '3'}, {'timestamp':  2.751564626, 'label': '4'}, {'timestamp':  3.355283447, 'label': '1'}, {'timestamp':  3.924172336, 'label': '2'}, {'timestamp':  4.435011338, 'label': '3'}, {'timestamp':  4.957460317, 'label': '4'}, {'timestamp':  5.537959184, 'label': '1'}, {'timestamp':  6.083628118, 'label': '2'}, {'timestamp':  6.629297052, 'label': '3'}, {'timestamp':  7.174965986, 'label': '4'}, {'timestamp':  7.709024943, 'label': '1'}, {'timestamp':  8.254693878, 'label': '2'}]


Bars occur where the "label" value is "1", so let's pull those out:

In [108]:
bars = [x['timestamp'] for x in beats if x['label'] == "1"]
bars

[ 1.207437642,
  3.355283447,
  5.537959184,
  7.709024943,
  10.019410431,
  12.399455782,
  14.733061224,
  17.078276644,
  19.435102041,
  21.768707483,
  24.125532880,
  26.459138322,
  28.827573696,
  31.196009070,
  33.564444444,
  35.898049887,
  38.243265306,
  40.611700680,
  42.968526077,
  45.325351474,
  47.577687075,
  49.841632653,
  52.163628118,
  54.520453515,
  56.865668934,
  59.106394558,
  61.393560091,
  63.727165533,
  66.095600907,
  68.429206349,
  70.786031746,
  73.131247166,
  75.511292517,
  77.868117914,
  80.213333333,
  82.570158730,
  84.915374150,
  87.260589569,
  89.640634921,
  91.985850340,
  94.331065760,
  96.687891156,
  99.033106576,
  101.366712018,
  103.572607710,
  105.871383220,
  108.100498866,
  110.410884354,
  112.756099773,
  115.112925170,
  116.900861678,
  118.456598639,
  120.023945578,
  121.579682540,
  123.158639456,
  124.725986395,
  126.281723356,
  127.849070295,
  129.416417234,
  130.995374150,
  132.562721088,
  134.1184

### Aside: Timestamp types

Note that timestamps are a `vampyhost` type.

In [109]:
type(bars[0])

vampyhost.RealTime

In [110]:
[x for x in dir(bars[0]) if not x.startswith("_")]

['to_float', 'to_frame', 'to_string', 'values']

In [111]:
bars[0].to_frame(rate), bars[0].to_string(), bars[0].to_float(),  bars[0].values()

(53248, '1.207', 1.207437642, (1, 207437642))

### Making Transcribe markers from bar detection

We might want to create a Markers section for Transcribe from the output of the bars and beats detector.

In [112]:
def make_transcribe_bar_markers(audio_data, rate, parameters = None):
    if parameters is None:
        parameters = {}
    bars_and_beats = vamp.collect(audio_data, rate, "qm-vamp-plugins:qm-barbeattracker", parameters = parameters)['list']
    bar_markers = [(i,x['timestamp']) for i,x in enumerate(bars_and_beats) if x['label']=='1']
    print(f"HowMany,{len(bar_markers)}")
    for i, (n,m) in enumerate(bar_markers):
        v = m.values()
        if i == len(bar_markers) - 1:
            bar_length = len(bars_and_beats) - n
        else:
            bar_length = bar_markers[i+1][0] - n


        print(f"M,-1,0,{i+1},{bar_length},{v[0]//3600:01d}:{v[0]//60:02d}:{v[0]%60:02d}.{v[1]//1000}")

In [113]:
make_transcribe_bar_markers(data, rate)

HowMany,134
M,-1,0,1,4,0:00:01.207437
M,-1,0,2,4,0:00:03.355283
M,-1,0,3,4,0:00:05.537959
M,-1,0,4,4,0:00:07.709024
M,-1,0,5,4,0:00:10.19410
M,-1,0,6,4,0:00:12.399455
M,-1,0,7,4,0:00:14.733061
M,-1,0,8,4,0:00:17.78276
M,-1,0,9,4,0:00:19.435102
M,-1,0,10,4,0:00:21.768707
M,-1,0,11,4,0:00:24.125532
M,-1,0,12,4,0:00:26.459138
M,-1,0,13,4,0:00:28.827573
M,-1,0,14,4,0:00:31.196009
M,-1,0,15,4,0:00:33.564444
M,-1,0,16,4,0:00:35.898049
M,-1,0,17,4,0:00:38.243265
M,-1,0,18,4,0:00:40.611700
M,-1,0,19,4,0:00:42.968526
M,-1,0,20,4,0:00:45.325351
M,-1,0,21,4,0:00:47.577687
M,-1,0,22,4,0:00:49.841632
M,-1,0,23,4,0:00:52.163628
M,-1,0,24,4,0:00:54.520453
M,-1,0,25,4,0:00:56.865668
M,-1,0,26,4,0:00:59.106394
M,-1,0,27,4,0:01:01.393560
M,-1,0,28,4,0:01:03.727165
M,-1,0,29,4,0:01:06.95600
M,-1,0,30,4,0:01:08.429206
M,-1,0,31,4,0:01:10.786031
M,-1,0,32,4,0:01:13.131247
M,-1,0,33,4,0:01:15.511292
M,-1,0,34,4,0:01:17.868117
M,-1,0,35,4,0:01:20.213333
M,-1,0,36,4,0:01:22.570158
M,-1,0,37,4,0:01:24.915374
M

We can then take the output of this, and paste it into a Transcribe `.xsc` file. Opening the file in Transcribe will then display these markers.

Unfortunately, when we look at the result in Transcribe the bar markers that have been created here for this song are more or less bullshit - there's far too many of them and they're in all the wrong places. I suspect this is related to the BPM detection being off earlier unless the rate is doubled. Unfortunately, doubling the rate doesn't work for this analyzer. Perhaps we can adjust the parameters to get better results?

In [114]:
vamp.get_parameters_of("qm-vamp-plugins:qm-barbeattracker")

[{'identifier': 'bpb',
  'name': 'Beats per Bar',
  'description': 'The number of beats in each bar',
  'unit': '',
  'minValue': 2.0,
  'maxValue': 16.0,
  'defaultValue': 4.0,
  'isQuantized': True,
  'quantizeStep': 1.0},
 {'identifier': 'alpha',
  'name': 'Alpha',
  'description': 'Inertia - Flexibility Trade Off',
  'unit': '',
  'minValue': 0.10000000149011612,
  'maxValue': 0.9900000095367432,
  'defaultValue': 0.8999999761581421,
  'isQuantized': False},
 {'identifier': 'inputtempo',
  'name': 'Tempo Hint',
  'description': 'User-defined tempo on which to centre the tempo preference function',
  'unit': 'BPM',
  'minValue': 50.0,
  'maxValue': 250.0,
  'defaultValue': 120.0,
  'isQuantized': True,
  'quantizeStep': 1.0},
 {'identifier': 'constraintempo',
  'name': 'Constrain Tempo',
  'description': 'Constrain more tightly around the tempo hint, using a Gaussian weighting instead of Rayleigh',
  'unit': '',
  'minValue': 0.0,
  'maxValue': 1.0,
  'defaultValue': 0.0,
  'isQuant

In [115]:
make_transcribe_bar_markers(data, rate, {"inputtempo":mean_tempo, "constraintempo": 1})

HowMany,80
M,-1,0,1,4,0:00:01.207437
M,-1,0,2,4,0:00:04.144761
M,-1,0,3,4,0:00:07.430385
M,-1,0,4,4,0:00:10.530249
M,-1,0,5,4,0:00:13.548843
M,-1,0,6,4,0:00:16.683537
M,-1,0,7,4,0:00:19.876281
M,-1,0,8,4,0:00:22.964535
M,-1,0,9,4,0:00:26.76009
M,-1,0,10,4,0:00:29.59773
M,-1,0,11,4,0:00:32.600816
M,-1,0,12,4,0:00:35.700680
M,-1,0,13,4,0:00:38.812154
M,-1,0,14,4,0:00:41.946848
M,-1,0,15,4,0:00:45.116371
M,-1,0,16,4,0:00:48.239455
M,-1,0,17,4,0:00:51.397369
M,-1,0,18,4,0:00:54.520453
M,-1,0,19,4,0:00:57.678367
M,-1,0,20,4,0:01:00.801451
M,-1,0,21,4,0:01:03.947755
M,-1,0,22,4,0:01:07.70839
M,-1,0,23,4,0:01:10.193922
M,-1,0,24,4,0:01:13.340226
M,-1,0,25,4,0:01:16.474920
M,-1,0,26,4,0:01:19.632834
M,-1,0,27,4,0:01:22.744308
M,-1,0,28,4,0:01:25.902222
M,-1,0,29,4,0:01:29.36916
M,-1,0,30,4,0:01:32.160000
M,-1,0,31,4,0:01:35.317913
M,-1,0,32,4,0:01:38.440997
M,-1,0,33,4,0:01:41.598911
M,-1,0,34,4,0:01:44.710385
M,-1,0,35,4,0:01:47.879909
M,-1,0,36,4,0:01:51.2993
M,-1,0,37,4,0:01:54.137687
M,-1,

This is much better! So if we have an idea of the tempo in advance we can give it as a hint. We can use the tempo tracker to get that hint.

In [116]:

vamp.get_outputs_of("qm-vamp-plugins:qm-barbeattracker")

['beats', 'bars', 'beatcounts', 'beatsd']

Note that we can also just get only the bar markers:

In [117]:
vamp.collect(data, rate,"qm-vamp-plugins:qm-barbeattracker", output="bars", parameters={"inputtempo":mean_tempo, "constraintempo": 1} )

{'list': [{'timestamp':  1.207437642, 'label': '1'},
  {'timestamp':  4.144761905, 'label': '2'},
  {'timestamp':  7.430385488, 'label': '3'},
  {'timestamp':  10.530249433, 'label': '4'},
  {'timestamp':  13.548843537, 'label': '5'},
  {'timestamp':  16.683537415, 'label': '6'},
  {'timestamp':  19.876281179, 'label': '7'},
  {'timestamp':  22.964535147, 'label': '8'},
  {'timestamp':  26.076009070, 'label': '9'},
  {'timestamp':  29.059773243, 'label': '10'},
  {'timestamp':  32.600816327, 'label': '11'},
  {'timestamp':  35.700680272, 'label': '12'},
  {'timestamp':  38.812154195, 'label': '13'},
  {'timestamp':  41.946848073, 'label': '14'},
  {'timestamp':  45.116371882, 'label': '15'},
  {'timestamp':  48.239455782, 'label': '16'},
  {'timestamp':  51.397369615, 'label': '17'},
  {'timestamp':  54.520453515, 'label': '18'},
  {'timestamp':  57.678367347, 'label': '19'},
  {'timestamp':  60.801451247, 'label': '20'},
  {'timestamp':  63.947755102, 'label': '21'},
  {'timestamp':  

## Section Detection

There's a plugin which detects different "segments" of the song. How does that do?

In [118]:
segments = vamp.collect(data, rate, "qm-vamp-plugins:qm-segmenter")['list']
segments

[{'timestamp':  0.000000000,
  'duration':  11.600000000,
  'label': 'A',
  'values': array([1.], dtype=float32)},
 {'timestamp':  11.600000000,
  'duration':  26.800000000,
  'label': 'B',
  'values': array([2.], dtype=float32)},
 {'timestamp':  38.400000000,
  'duration':  11.800000000,
  'label': 'A',
  'values': array([1.], dtype=float32)},
 {'timestamp':  50.200000000,
  'duration':  27.400000000,
  'label': 'C',
  'values': array([3.], dtype=float32)},
 {'timestamp':  77.600000000,
  'duration':  10.600000000,
  'label': 'D',
  'values': array([4.], dtype=float32)},
 {'timestamp':  88.200000000,
  'duration':  23.200000000,
  'label': 'B',
  'values': array([2.], dtype=float32)},
 {'timestamp':  111.400000000,
  'duration':  28.400000000,
  'label': 'E',
  'values': array([5.], dtype=float32)},
 {'timestamp':  139.800000000,
  'duration':  11.800000000,
  'label': 'F',
  'values': array([6.], dtype=float32)},
 {'timestamp':  151.600000000,
  'duration':  11.600000000,
  'label': 

In [119]:
segment_starts = [(x['timestamp'], x['label']) for x in segments]
segment_starts

[( 0.000000000, 'A'),
 ( 11.600000000, 'B'),
 ( 38.400000000, 'A'),
 ( 50.200000000, 'C'),
 ( 77.600000000, 'D'),
 ( 88.200000000, 'B'),
 ( 111.400000000, 'E'),
 ( 139.800000000, 'F'),
 ( 151.600000000, 'A'),
 ( 163.200000000, 'F'),
 ( 181.200000000, 'G'),
 ( 185.400000000, 'H'),
 ( 214.200000000, 'F'),
 ( 223.400000000, 'G'),
 ( 239.800000000, 'F')]

Note how it spots similar segments, e.g. the first and third sections are thought to be similar. I wonder if doing BPM detection on the segments would be more accurate.

In [120]:
last_frames  = [x[0].to_frame(rate) for x in segment_starts[1:]]
first_frames = [0]
first_frames.extend(last_frames)
last_frames.append(len(data))
segment_boundaries = list(zip(first_frames, last_frames))
segment_boundaries


[(0, 511560),
 (511560, 1693440),
 (1693440, 2213820),
 (2213820, 3422160),
 (3422160, 3889620),
 (3889620, 4912740),
 (4912740, 6165180),
 (6165180, 6685560),
 (6685560, 7197120),
 (7197120, 7990920),
 (7990920, 8176140),
 (8176140, 9446220),
 (9446220, 9851940),
 (9851940, 10575180),
 (10575180, 2)]

In [124]:
segment_data = [data[:,f:l] for (f,l) in zip(first_frames, last_frames)]

In [125]:
def get_bpm(x):
    return float(x['label'].rstrip(" bpm"))

In [126]:
for seg_index, this_segment in zip(segment_starts, segment_data):
    segment_tempo = vamp.collect(this_segment, 2*rate, "qm-vamp-plugins:qm-tempotracker", parameters={"inputtempo":mean_tempo})['list']
    bpms = [get_bpm(x) for x in segment_tempo if 'label' in x and x['label']]
    if bpms:
        mean = sum(bpms) / len(bpms)
        variance = sum((x-mean)**2 for x in bpms) / (len(bpms)-1)
        print(f"{seg_index} {mean:.2f},{variance**0.5:.2f}")

( 11.600000000, 'B') 74.37,4.04
( 50.200000000, 'C') 76.53,0.58
( 88.200000000, 'B') 76.43,1.49
( 111.400000000, 'E') 76.53,0.58
( 163.200000000, 'F') 76.41,0.57
( 185.400000000, 'H') 76.63,0.58
( 223.400000000, 'G') 78.63,4.45


# BBC Rhythm

In [127]:
BBCR = "bbc-vamp-plugins:bbc-rhythm"


In [128]:
vamp.get_parameters_of(BBCR)

[{'identifier': 'numBands',
  'name': 'Sub-bands',
  'description': 'Number of sub-bands.',
  'unit': '',
  'minValue': 2.0,
  'maxValue': 50.0,
  'defaultValue': 7.0,
  'isQuantized': True,
  'quantizeStep': 1.0},
 {'identifier': 'threshold',
  'name': 'Threshold',
  'description': 'For peak picker.',
  'unit': '',
  'minValue': 0.0,
  'maxValue': 10.0,
  'defaultValue': 1.0,
  'isQuantized': False},
 {'identifier': 'average_window',
  'name': 'Moving average window length',
  'description': 'Length of window used for moving average.',
  'unit': 'frames',
  'minValue': 1.0,
  'maxValue': 500.0,
  'defaultValue': 200.0,
  'isQuantized': True,
  'quantizeStep': 1.0},
 {'identifier': 'peak_window',
  'name': 'Onset peak window length',
  'description': 'Length of window used for peak picking.',
  'unit': 'frames',
  'minValue': 1.0,
  'maxValue': 20.0,
  'defaultValue': 6.0,
  'isQuantized': True,
  'quantizeStep': 1.0},
 {'identifier': 'min_bpm',
  'name': 'Minimum BPM',
  'description'

In [129]:
vamp.get_outputs_of(BBCR)

['onset_curve',
 'average',
 'diff',
 'onset',
 'avg-onset-freq',
 'rhythm-strength',
 'autocor',
 'mean-correlation-peak',
 'peak-valley-ratio',
 'tempo']

In [132]:
vamp.collect(data, 2*rate, BBCR, output="tempo", parameters={"min_bpm":50})

{'list': [{'timestamp':  0.000000000,
   'label': '',
   'values': array([76.5625], dtype=float32)}]}