In [1]:
import json
import pretty_midi as pm
import pandas as pd
import numpy as np
from pathlib import Path
import sys

from alignment import (find_match, get_midi_notes, normalize_midi_notes,
                       notes_are_similar)

In [2]:
# maestro dataframe (from google)
maestro = pd.read_json('maestro-v2.0.0.json')

# virtuoso dataframe (i produce parsing the folders of the dataset)
virtuoso_net = pd.read_pickle("performance_dataframe.pkl")

In [3]:
maestro.head()

Unnamed: 0,canonical_composer,canonical_title,split,year,midi_filename,audio_filename,duration
0,Alban Berg,Sonata Op. 1,train,2018,2018/MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R...,2018/MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R...,698.66116
1,Alban Berg,Sonata Op. 1,train,2008,2008/MIDI-Unprocessed_03_R2_2008_01-03_ORIG_MI...,2008/MIDI-Unprocessed_03_R2_2008_01-03_ORIG_MI...,759.518471
2,Alban Berg,Sonata Op. 1,train,2017,2017/MIDI-Unprocessed_066_PIANO066_MID--AUDIO-...,2017/MIDI-Unprocessed_066_PIANO066_MID--AUDIO-...,464.649433
3,Alexander Scriabin,"24 Preludes Op. 11, No. 13-24",train,2004,2004/MIDI-Unprocessed_XP_21_R1_2004_01_ORIG_MI...,2004/MIDI-Unprocessed_XP_21_R1_2004_01_ORIG_MI...,872.640588
4,Alexander Scriabin,"3 Etudes, Op. 65",validation,2006,2006/MIDI-Unprocessed_17_R1_2006_01-06_ORIG_MI...,2006/MIDI-Unprocessed_17_R1_2006_01-06_ORIG_MI...,397.857508


In [4]:
virtuoso_net.head()

Unnamed: 0,author,folder,midi2midi_alignment_path,performed_midi_path,performer,score2midi_alignment,score_midi_path,score_xml_path,title
0,Bach,Bach/Fugue/bwv_846,Bach/Fugue/bwv_846/Shi05_infer_corresp.txt,Bach/Fugue/bwv_846/Shi05.mid,Shi05,Bach/Fugue/bwv_846/Shi05_infer_match.txt,Bach/Fugue/bwv_846/midi_cleaned.mid,Bach/Fugue/bwv_846/musicxml_cleaned.musicxml,Fugue_bwv_846
1,Bach,Bach/Fugue/bwv_848,Bach/Fugue/bwv_848/Denisova06_infer_corresp.txt,Bach/Fugue/bwv_848/Denisova06.mid,Denisova06,Bach/Fugue/bwv_848/Denisova06_infer_match.txt,Bach/Fugue/bwv_848/midi_cleaned.mid,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Fugue_bwv_848
2,Bach,Bach/Fugue/bwv_848,Bach/Fugue/bwv_848/Lee01_infer_corresp.txt,Bach/Fugue/bwv_848/Lee01.mid,Lee01,Bach/Fugue/bwv_848/Lee01_infer_match.txt,Bach/Fugue/bwv_848/midi_cleaned.mid,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Fugue_bwv_848
3,Bach,Bach/Fugue/bwv_848,Bach/Fugue/bwv_848/LeeSH01_infer_corresp.txt,Bach/Fugue/bwv_848/LeeSH01.mid,LeeSH01,Bach/Fugue/bwv_848/LeeSH01_infer_match.txt,Bach/Fugue/bwv_848/midi_cleaned.mid,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Fugue_bwv_848
4,Bach,Bach/Fugue/bwv_848,Bach/Fugue/bwv_848/Lin04_infer_corresp.txt,Bach/Fugue/bwv_848/Lin04.mid,Lin04,Bach/Fugue/bwv_848/Lin04_infer_match.txt,Bach/Fugue/bwv_848/midi_cleaned.mid,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Fugue_bwv_848


## Example alignment

In [5]:
# Use as python alignment.py from the command line.
# Try "python alignment.py -h" for help.
# This output is like running "python alignment.py -c Haydn -v"

# Argument -md maestro
MAESTRO_DIR = Path("./maestro") #you need to download the maestro dataset from https://magenta.tensorflow.org/datasets/maestro
# Argument -vnd .
VIRTUOSO_DIR = Path("./")
# Argument -c Haydn
COMPOSER = 'Haydn' # VirtuosoNet composer (None to search all V-net composers)
# Flag -e
EXHAUSTIVE = False # Search all (even non-matching) MAESTRO composers
# Flag -v
VERBOSE = True
# Argument -o correspondence.json
OUTPUT = Path('correspondence.json')

In [6]:
# Filter V-Net by chosen composer
v_net = virtuoso_net

if COMPOSER is not None:
    v_net = pd.DataFrame(
        v_net.loc[virtuoso_net.author.str.lower() == COMPOSER.lower()]
    )
    if len(v_net) == 0:
        print(f'Error: No composer matching "{COMPOSER}" found in VirtuosoNet.',
              file=sys.stderr)
        if VERBOSE:
            print('Possible composers are:', file=sys.stderr)
            print(virtuoso_net.groupby('author').size(), file=sys.stderr)

In [7]:
# Some data storage
vnet_notes = {} # Don't pre-load all notes. Only when we need to and save them here.
maestro_notes = {} # DataFrames are not good for holding lists
matches = {} # Result storage

In [8]:
# Initial search, only through matching MAESTRO composers
# Still group V-net here by composer, to simplify code in case of no composer given
for composer, group in v_net.groupby('author'):
    if VERBOSE:
        print(f'Searching for matches to composer "{composer}", in closely-'
              'matching MAESTRO composers.')
        print('The first piece will take longer as it must compute note data.')

    filtered_maestro = maestro.loc[
        maestro.canonical_composer.str.lower().str.contains(composer.lower())
    ]
    group.apply(find_match, axis=1, args=(filtered_maestro, vnet_notes,
                                          maestro_notes, matches),
                vnet_base=VIRTUOSO_DIR, maestro_base=MAESTRO_DIR,
                verbose=VERBOSE)

Searching for matches to composer "Haydn", in closely-matching MAESTRO composers.
The first piece will take longer as it must compute note data.
Searching for matches to piece "Haydn/Keyboard_Sonatas/31-1/Masycheva01.mid"
Searching for matches to piece "Haydn/Keyboard_Sonatas/31-1/SCHU02.mid"
Searching for matches to piece "Haydn/Keyboard_Sonatas/31-1/Song05.mid"
    Match found: "2008/MIDI-Unprocessed_13_R1_2008_01-04_ORIG_MID--AUDIO_13_R1_2008_wav--2.midi"
Searching for matches to piece "Haydn/Keyboard_Sonatas/31-1/Zagalskaia02.mid"
Searching for matches to piece "Haydn/Keyboard_Sonatas/32-1/Pavlovic02.mid"
Searching for matches to piece "Haydn/Keyboard_Sonatas/32-1/SUDBIN01.mid"
Searching for matches to piece "Haydn/Keyboard_Sonatas/32-1_no_repeat/Goldberg01.mid"
Searching for matches to piece "Haydn/Keyboard_Sonatas/32-1_no_repeat/Guzman01.mid"
Searching for matches to piece "Haydn/Keyboard_Sonatas/32-1_no_repeat/MCVEY01.mid"
Searching for matches to piece "Haydn/Keyboard_Sonatas/3

In [9]:
# Extended search through all MAESTRO pieces
if EXHAUSTIVE:
    # No need to group V-net by composer here
    if VERBOSE:
        print(f'Exhaustively searching MAESTRO for any unmatched pieces.')
        print('The first piece will take longer as it must compute note data.')

    # This saves and UNmatched composers (notice the '~')
    filtered_maestro = maestro.loc[
        ~maestro.canonical_composer.str.lower().str.contains(composer.lower())
    ]
    v_net_filtered = v_net.loc[~v_net.index.isin(matches)]
    v_net_filtered.apply(find_match, axis=1, args=(filtered_maestro, vnet_notes,
                                                   maestro_notes, matches),
                         vnet_base=VIRTUOSO_DIR, maestro_base=MAESTRO_DIR,
                         verbose=VERBOSE)

In [10]:
# Calculate and aggregate final results
name_matches = {
    v_net.loc[key, 'performed_midi_path']: [maestro.loc[idx, 'midi_filename'] for idx in value]
    for key, value in matches.items()
}

multiple_vnet = [key for key, value in name_matches.items() if len(value) > 1]
unmatched_vnet = list(v_net.loc[~v_net.index.isin(matches), 'performed_midi_path'])

# Multiply matched MAESTRO pieces
# Only check those which we searched
maestro_match_counts = {key: 0 for key in maestro_notes}
for idx_list in matches.values():
    for idx in idx_list:
        maestro_match_counts[idx] += 1
multiple_maestro = [maestro.loc[idx, 'midi_filename']
                    for idx, count in maestro_match_counts.items() if count > 1]
unmatched_maestro = [maestro.loc[idx, 'midi_filename']
                     for idx, count in maestro_match_counts.items() if count == 0]

In [11]:
# Print results
if VERBOSE:
    print(f'ALL MATCHES ({len(name_matches)}):')
    print(name_matches)
    print()
    print(f'Multiply matched VirtuosoNet pieces ({len(multiple_vnet)}):')
    print('\n'.join(multiple_vnet))
    print()
    print(f'Unmatched VirtuosoNet pieces ({len(unmatched_vnet)}):')
    print('\n'.join(unmatched_vnet))
    print()
    print(f'Multiply matched MAESTRO pieces ({len(multiple_maestro)}):')
    print('\n'.join(multiple_maestro))
    print()
    print(f'Unmatched MAESTRO pieces ({len(unmatched_maestro)}):')
    print('\n'.join(unmatched_maestro))

ALL MATCHES (15):
{'Haydn/Keyboard_Sonatas/31-1/Song05.mid': ['2008/MIDI-Unprocessed_13_R1_2008_01-04_ORIG_MID--AUDIO_13_R1_2008_wav--2.midi'], 'Haydn/Keyboard_Sonatas/39-1/Yarden07.mid': ['2006/MIDI-Unprocessed_23_R1_2006_01-05_ORIG_MID--AUDIO_23_R1_2006_02_Track02_wav.midi'], 'Haydn/Keyboard_Sonatas/46-1/WongWY02.mid': ['2015/MIDI-Unprocessed_R1_D2-13-20_mid--AUDIO-from_mp3_19_R1_2015_wav--2.midi'], 'Haydn/Keyboard_Sonatas/46-1/YuP03.mid': ['2017/MIDI-Unprocessed_042_PIANO042_MID--AUDIO-split_07-06-17_Piano-e_1-02_wav--3.midi'], 'Haydn/Keyboard_Sonatas/48-1/GarritsonL02.mid': ['2014/MIDI-UNPROCESSED_21-22_R1_2014_MID--AUDIO_22_R1_2014_wav--1.midi'], 'Haydn/Keyboard_Sonatas/48-1/PrjevalskayaM02.mid': ['2014/MIDI-UNPROCESSED_06-08_R1_2014_MID--AUDIO_08_R1_2014_wav--1.midi'], 'Haydn/Keyboard_Sonatas/50-1_no_repeat/EVSTIO01.mid': ['2018/MIDI-Unprocessed_Recital8_MID--AUDIO_08_R1_2018_wav--1.midi'], 'Haydn/Keyboard_Sonatas/50-1_no_repeat/GalantM02.mid': ['2018/MIDI-Unprocessed_Recital8_MI

In [12]:
# Write output to file
OUTPUT.parent.mkdir(parents=True, exist_ok=True)
with OUTPUT.open(mode='w') as file:
    json.dump({'matches': name_matches,
               'multiple_vnet': multiple_vnet,
               'unmatched_vnet': unmatched_vnet,
               'multiple_maestro': multiple_maestro,
               'unmatched_maestro': unmatched_maestro},
              file, indent=4, sort_keys=True)

In [13]:
# Verify that output was written
with OUTPUT.open(mode='r') as file:
    for line in file:
        print(line)

{

    "matches": {

        "Haydn/Keyboard_Sonatas/31-1/Song05.mid": [

            "2008/MIDI-Unprocessed_13_R1_2008_01-04_ORIG_MID--AUDIO_13_R1_2008_wav--2.midi"

        ],

        "Haydn/Keyboard_Sonatas/39-1/Yarden07.mid": [

            "2006/MIDI-Unprocessed_23_R1_2006_01-05_ORIG_MID--AUDIO_23_R1_2006_02_Track02_wav.midi"

        ],

        "Haydn/Keyboard_Sonatas/46-1/WongWY02.mid": [

            "2015/MIDI-Unprocessed_R1_D2-13-20_mid--AUDIO-from_mp3_19_R1_2015_wav--2.midi"

        ],

        "Haydn/Keyboard_Sonatas/46-1/YuP03.mid": [

            "2017/MIDI-Unprocessed_042_PIANO042_MID--AUDIO-split_07-06-17_Piano-e_1-02_wav--3.midi"

        ],

        "Haydn/Keyboard_Sonatas/48-1/GarritsonL02.mid": [

            "2014/MIDI-UNPROCESSED_21-22_R1_2014_MID--AUDIO_22_R1_2014_wav--1.midi"

        ],

        "Haydn/Keyboard_Sonatas/48-1/PrjevalskayaM02.mid": [

            "2014/MIDI-UNPROCESSED_06-08_R1_2014_MID--AUDIO_08_R1_2014_wav--1.midi"

        ],

        "Haydn