In [1]:
# Read dataset

import gzip
import json

with gzip.open('Hooktheory.json.gz', 'r') as f:
    dataset = json.load(f)

print(len(dataset))

26175


In [2]:
# Filter dataset

train_set = {
    k:v for k, v in dataset.items()
    if v['split'] == 'TRAIN'
    and 'AUDIO_AVAILABLE' in v['tags']
    and 'MELODY' in v['tags']
    and 'TEMPO_CHANGES' not in v['tags']}
print(len(train_set))

16629


In [3]:
# Inspect one

example = train_set['kwxAaOrYxKG']
print(json.dumps(example, indent=2))

{
  "tags": [
    "NO_SWING",
    "AUDIO_AVAILABLE",
    "USER_ALIGNMENT",
    "REFINED_ALIGNMENT",
    "MELODY",
    "HARMONY"
  ],
  "split": "TRAIN",
  "hooktheory": {
    "id": "kwxAaOrYxKG",
    "artist": "cool-and-new-web-comic",
    "song": "plague",
    "annotators": [
      "StarlightCalliope",
      "Vaz123"
    ],
    "urls": {
      "artist": "https://www.hooktheory.com/theorytab/artists/c/cool-and-new-web-comic",
      "song": "https://www.hooktheory.com/theorytab/view/cool-and-new-web-comic/plague",
      "clip": "https://hookpad.hooktheory.com/?idOfSong=kwxAaOrYxKG"
    }
  },
  "youtube": {
    "id": "CdyqtJXjJa0",
    "url": "https://www.youtube.com/watch?v=CdyqtJXjJa0",
    "duration": 144.6371201814059
  },
  "alignment": {
    "swing": "STRAIGHT",
    "user": {
      "beats": [
        0,
        32
      ],
      "times": [
        0.0,
        14.817553598332898
      ]
    },
    "refined": {
      "beats": [
        0,
        1,
        2,
        3,
        4,

In [4]:
# Parse alignment

import numpy as np
from scipy.interpolate import interp1d

beat_to_time_fn = interp1d(
    example['alignment']['refined']['beats'],
    example['alignment']['refined']['times'],
    kind='linear',
    fill_value='extrapolate')
start_time = beat_to_time_fn(0)
num_beats = example['annotations']['num_beats']
end_time = beat_to_time_fn(num_beats)
print(start_time, end_time)

0.11 14.88


In [5]:
# Interpret annotation as MIDI

CHORD_OCTAVE = 4
MELODY_OCTAVE = 5

import pretty_midi

midi = pretty_midi.PrettyMIDI()

# Create click track
click = pretty_midi.Instrument(program=0, is_drum=True)
midi.instruments.append(click)
beats_per_bar = example['annotations']['meters'][0]['beats_per_bar']
for b in range(num_beats + 1):
    downbeat = b % beats_per_bar == 0
    click.notes.append(pretty_midi.Note(
        100 if downbeat else 75,
        37 if downbeat else 31,
        beat_to_time_fn(b),
        beat_to_time_fn(b + 1)))

# Create harmony track
harmony = pretty_midi.Instrument(program=24)  # Acoustic Guitar (nylon)
midi.instruments.append(harmony)
for c in example['annotations']['harmony']:
    root_position_pitches = [c['root_pitch_class']]
    for interval in c['root_position_intervals']:
        root_position_pitches.append(root_position_pitches[-1] + interval)
    for p in root_position_pitches:
        harmony.notes.append(pretty_midi.Note(
            67,
            p + CHORD_OCTAVE * 12,
            beat_to_time_fn(c['onset']),
            beat_to_time_fn(c['offset'])
        ))

# Create melody track
melody = pretty_midi.Instrument(program=0)
midi.instruments.append(melody)
for n in example['annotations']['melody']:
    melody.notes.append(pretty_midi.Note(
        100,
        n['pitch_class'] + (MELODY_OCTAVE + n['octave']) * 12,
        beat_to_time_fn(n['onset']),
        beat_to_time_fn(n['offset'])
    ))

midi.write('annotations.midi')

In [6]:
# Retrieve and decode audio (this one is CC-licensed)
# Thanks JohnJRenns https://coolandnewwebcomic.bandcamp.com/track/plague !

import librosa
from IPython.display import display, Audio

!youtube-dl --format bestaudio/best --output 'audio' {example['youtube']['url']}
audio, sr = librosa.load('audio', sr=None, mono=True, offset=start_time, duration=end_time - start_time)

[youtube] CdyqtJXjJa0: Downloading webpage
[youtube] CdyqtJXjJa0: Downloading android player API JSON
[youtube] CdyqtJXjJa0: Downloading MPD manifest
[youtube] CdyqtJXjJa0: Downloading MPD manifest
[info] CdyqtJXjJa0: Downloading 1 format(s): 251
[download] audio has already been downloaded
[K[download] 100% of 2.39MiB




In [7]:
# Synthesize aligned preview

annotations_audio = midi.fluidsynth(fs=sr)
annotations_audio = annotations_audio[round(start_time * sr):]
annotations_audio = annotations_audio[:audio.shape[0]]
if annotations_audio.shape[0] < audio.shape[0]:
    annotations_audio = np.pad(annotations_audio, [(0, audio.shape[0] - annotations_audio.shape[0])])
display(Audio([audio, annotations_audio], rate=sr))