In [29]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pretty_midi
import librosa
import mir_eval
# import mir_eval.display
import tables
import IPython.display
import os
import json

In [22]:
# Local path constants
# DATA_PATH = 'lmd/data/lmd_aligned'
DATA_PATH = 'lmd/data/lmd_matched_mp3'
RESULTS_PATH = 'lmd/results'
# Path to the file match_scores.json distributed with the LMD
SCORE_FILE = os.path.join(RESULTS_PATH, 'match_scores.json')

# Utility functions for retrieving paths
def msd_id_to_dirs(msd_id):
    """Given an MSD ID, generate the path prefix.
    E.g. TRABCD12345678 -> A/B/C/TRABCD12345678"""
    return os.path.join(msd_id[2], msd_id[3], msd_id[4], msd_id)

def msd_id_to_mp3(msd_id):
    """Given an MSD ID, return the path to the corresponding mp3"""
    return os.path.join(DATA_PATH,
                        msd_id_to_dirs(msd_id) + '.mp3')

def msd_id_to_h5(h5):
    """Given an MSD ID, return the path to the corresponding h5"""
    return os.path.join(RESULTS_PATH, 'lmd_matched_h5',
                        msd_id_to_dirs(msd_id) + '.h5')

def get_midi_path(msd_id, midi_md5, kind):
    """Given an MSD ID and MIDI MD5, return path to a MIDI file.
    kind should be one of 'matched' or 'aligned'. """
    return os.path.join(RESULTS_PATH, 'lmd_{}'.format(kind),
                        msd_id_to_dirs(msd_id), midi_md5 + '.mid')

In [15]:
with open(SCORE_FILE) as f:
    scores = json.load(f)
# Grab a Million Song Dataset ID from the scores dictionary
msd_id = list(scores.keys())[1234]
print('Million Song Dataset ID {} has {} MIDI file matches:'.format(
    msd_id, len(scores[msd_id])))
for midi_md5, score in scores[msd_id].items():
    print('  {} with confidence score {}'.format(midi_md5, score))

Million Song Dataset ID TRFIAIQ128F92E63F6 has 2 MIDI file matches:
  de8c3a059eade88d6560b399f7833f73 with confidence score 0.5273469231625926
  5b0c2b2ddb69956d7f2c5725eb81f417 with confidence score 0.5995753568226387


In [16]:
len(scores)

31034

In [17]:
len([list(score.items())[0] for key, score in scores.items() if list(score.items())[0][1] > .9])

112

In [18]:
testscores = {key:score for key, score in scores.items() if list(score.items())[0][1] > .9}

In [19]:
testscores

{'TRTEIGY12903CFCC9E': {'daee3a832549d786b4ea23e2b4d11a6d': 0.9722543923353655},
 'TRLZGRH12903CB7707': {'daee3a832549d786b4ea23e2b4d11a6d': 0.9874281840697164},
 'TRGYBOF128F9310DCA': {'0eeaa947af2669e9b553a25d13f29622': 0.9029154961065542},
 'TRDGOJH128F148CCDB': {'7146170cc7db3da4cf484fd1086ca9a7': 0.9898154992374425},
 'TRWALGO128F1473A9A': {'4abcfb2ff6a16cba208c872d54109159': 0.9597153885658751,
  '169e45b7b15fb639ac57f97178c8fe1c': 0.7342946877764172,
  '81af3ab7888206681dc2b3c5ea72f254': 0.7185811919089042},
 'TRCPCIP128EF344085': {'c198cf31d1257999ad0f0f2cea17eb7c': 0.9048635006646004},
 'TREGROG128F427481F': {'89f13a056667cd22ae8ff92505c5eece': 1.0000500152243248,
  '845b647dcf92b1510ea92758b1a28cc9': 1.0000465033695254},
 'TRNIEGY128F4265949': {'882d38bc3a427af0a885dbce1ad70b8d': 1.0320040132604194,
  'ae9fcc0f28ddadcdffcf324db1d29f1a': 0.7352695757183978,
  'a56fcc3981e80e055520cb8bd795b503': 0.7320381869562782,
  'ecf2e187c389d83c1596c74ef2826e62': 0.7427770667971219,
  '59

In [90]:
best_match_val = max(matches.values())
print(best_match_val)
midi_md5 = []

0.9834245604091866


In [91]:
max(matches, key=matches.get)

'93f22843c14c793d32d4219a3887175a'

good match; '93f22843c14c793d32d4219a3887175a'

In [116]:
# Grab an MSD ID and its dictionary of matches
msd_id, matches = testscores.popitem()
# Grab a MIDI from the matches
midi_md5 = max(matches, key=matches.get)
score = matches[midi_md5]
# Construct the path to the aligned MIDI
aligned_midi_path = get_midi_path(msd_id, midi_md5, 'aligned')
# Load/parse the MIDI file with pretty_midi
pm = pretty_midi.PrettyMIDI(aligned_midi_path)

In [117]:
# Retrieve piano roll of the MIDI file
piano_roll = pm.get_piano_roll()
# Use 7 octaves starting from C1
piano_roll = piano_roll[12:96]
# Retrieve the audio corresponding to this MSD entry
audio, fs = librosa.load(msd_id_to_mp3(msd_id))
# Compute constant-Q spectrogram
# cqt = librosa.logamplitude(librosa.cqt(audio))
# Normalize for visualization
# cqt = librosa.util.normalize(cqt)

In [118]:
# Retrieve the beats and downbeats from pretty_midi
# Note that the beat phase will be wrong until the first time signature change after 0s
# So, let's start beat tracking from that point
first_ts_after_0 = [ts.time for ts in pm.time_signature_changes if ts.time > 0.][0]
# Get beats from pretty_midi, supplying a start time
beats = pm.get_beats(start_time=first_ts_after_0)
# .. downbeats, too
downbeats = pm.get_downbeats(start_time=first_ts_after_0)
# Display meter on top of waveform
plt.figure(figsize=(10, 3))
librosa.display.waveshow(audio, color='green', alpha=.5)
# mir_eval.display.events(beats, base=-1, height=2, color='orange')
# mir_eval.display.events(downbeats, base=-1, height=2, color='black', lw=2);

<librosa.display.AdaptiveWaveplot at 0x14fc42a10>

In [119]:
# Synthesize clicks at these downbeat times
beat_clicks = librosa.clicks(times=beats, length=audio.shape[0], sr=fs)
downbeat_clicks = librosa.clicks(times=downbeats, click_freq=2000, length=audio.shape[0],sr=fs)
IPython.display.Audio([audio, beat_clicks + downbeat_clicks], rate=fs)

In [75]:
librosa.clicks?

[0;31mSignature:[0m
[0mlibrosa[0m[0;34m.[0m[0mclicks[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtimes[0m[0;34m:[0m [0;34m'Optional[_SequenceLike[_FloatLike_co]]'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mframes[0m[0;34m:[0m [0;34m'Optional[_SequenceLike[_IntLike_co]]'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msr[0m[0;34m:[0m [0;34m'float'[0m [0;34m=[0m [0;36m22050[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mhop_length[0m[0;34m:[0m [0;34m'int'[0m [0;34m=[0m [0;36m512[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mclick_freq[0m[0;34m:[0m [0;34m'float'[0m [0;34m=[0m [0;36m1000.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mclick_duration[0m[0;34m:[0m [0;34m'float'[0m [0;34m=[0m [0;36m0.1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mclick[0m[0;34m:[0m [0;34m'Optional[np.ndarray]'[0m [0;34m=[0m [0;32mNone[0m[0;34m

In [76]:
fs

22050

In [86]:
msd_id, matches

('TREKLEO128F1459433',
 {'93f22843c14c793d32d4219a3887175a': 0.9834245604091866,
  '5003f23676569808496f9ebae0c73dec': 0.5767940687443286})