In [1]:
import midi_score

def get_midi(audio_path, output_midi_path):
    import torch
    import time
    from audio_midi import PianoTranscription, load_audio, sample_rate

    device = "cuda" if torch.cuda.is_available() else "cpu"
    (audio, _) = load_audio(audio_path, sr=sample_rate, mono=True)
    transcriptor = PianoTranscription(device=device)
    transcribe_time = time.time()
    transcriptor.transcribe(audio, output_midi_path)
    print("Transcribe time: {:.3f} s".format(time.time() - transcribe_time))
    if output_midi_path:
        return midi_score.read_note_sequence(output_midi_path)

def get_beats(audio_path):
    from BeatNet.BeatNet import BeatNet
    from madmom.features.downbeats import DBNDownBeatTrackingProcessor

    beatnet = BeatNet(model=1)
    beat_activ = beatnet.activation_extractor_online(audio_path)
    db_tracker = DBNDownBeatTrackingProcessor(beats_per_bar=[2, 3, 4], fps=50)
    return db_tracker(beat_activ)  # Using DBN offline inference to infer beat/downbeats

def get_keysig(midi_notes):
    key_sig_pro = midi_score.RNNKeySignatureProcessor()
    return key_sig_pro.process(midi_notes)

def get_hand_parts(midi_notes):
    hand_parts_pro = midi_score.RNNHandPartProcessor()
    return hand_parts_pro.process(midi_notes)


# midi = get_midi("example/sonatine.mp3", "example/sonatine.midi")
midi = midi_score.read_note_sequence("example/sonatine.midi")
beats = get_beats("example/sonatine.mp3")
key_change = get_keysig(midi)
hand_parts = get_hand_parts(midi)

In [2]:
builder = midi_score.MusicXMLBuilder(beats)
builder.add_notes(midi.numpy(), hand_parts.numpy())
builder.add_key_changes(key_change)
builder.infer_bpm_changes(diff_size=2, log_bin_size=0.03)
builder.render("example/sonatine.xml")